File: id3pic.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from struct import unpack
  27 from sys import argv, exit, stderr, stdin, stdout
  28 
  29 
  30 info = '''
  31 id3pic [options...] [filepath/URI...]
  32 
  33 
  34 Extract picture/thumbnail bytes from ID3/MP3 metadata, when available.
  35 
  36 Any leading options can start with either single or double-dash:
  37 
  38     -h          show this help message
  39     -help       show this help message
  40 '''
  41 
  42 # handle standard help cmd-line options, quitting right away in that case
  43 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  44     print(info.strip(), file=stderr)
  45     exit(0)
  46 
  47 
  48 def seems_url(s: str) -> bool:
  49     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
  50     return any(s.startswith(p) for p in protocols)
  51 
  52 
  53 def read_byte(src) -> int:
  54     b = src.read(1)
  55     return b[0] if b else -1
  56 
  57 
  58 def skip_zstring(src) -> int:
  59     return match_byte(src, 0)
  60 
  61 
  62 def skip_thumbnail_type_apic(src) -> int:
  63     if read_byte(src) < 0:
  64         raise Exception('failed to sync to thumbnail text-encoding')
  65     n = 1
  66 
  67     m = match_byte(src, ord('/'))
  68     if m < 0:
  69         raise Exception('failed to sync to thumbnail MIME-type')
  70     n += m
  71     m = skip_zstring(src)
  72     if m < 0:
  73         raise Exception('failed to sync to thumbnail MIME-type')
  74     n += m
  75 
  76     if read_byte(src) < 0:
  77         raise Exception('failed to sync to thumbnail picture type')
  78     n += 1
  79 
  80     m = skip_zstring(src)
  81     if m < 0:
  82         raise Exception('failed to sync to thumbnail comment')
  83     n += m
  84 
  85     return n
  86 
  87 
  88 def match_byte(src, what: int) -> int:
  89     n = 0
  90 
  91     while True:
  92         b = src.read(1)
  93         if not b:
  94             return -1
  95 
  96         n += 1
  97 
  98         if b[0] == what:
  99             return n
 100 
 101 
 102 def match_bytes(src, seq: bytes) -> int:
 103     i = 0
 104     n = 0
 105     end = len(seq)
 106 
 107     while True:
 108         b = src.read(1)
 109         if not b:
 110             return -1
 111 
 112         n += 1
 113 
 114         if b[0] == seq[i]:
 115             i += 1
 116         else:
 117             i = 0
 118 
 119         if i >= end:
 120             return n
 121 
 122 
 123 def handle_apic(w, src) -> bool:
 124     # section-size seems stored as 4 little-endian bytes
 125     chunk = src.read(4)
 126     if not chunk:
 127         raise Exception('failed to read thumbnail-payload size')
 128     size = unpack('>I', chunk)[0]
 129 
 130     n = skip_thumbnail_type_apic(src)
 131     if n < 0:
 132         raise Exception('failed to sync to start of thumbnail data')
 133     size -= n
 134 
 135     # copy all thumbnail bytes
 136     w.write(src.read(size))
 137     return True
 138 
 139 
 140 def handle_pic(w, src) -> bool:
 141     # http://www.unixgods.org/Ruby/ID3/docs/id3v2-00.html#PIC
 142 
 143     # thumbnail-payload-size seems stored as 3 big-endian bytes
 144     a = src.read(1)
 145     b = src.read(1)
 146     c = src.read(1)
 147     if not (a and b and c):
 148         raise Exception('failed to read thumbnail-payload size')
 149     size = (256 * 256) * a[0] + 256 * b[0] + c[0]
 150 
 151     # skip the text encoding
 152     src.read(5)
 153 
 154     # skip a null-delimited string
 155     while True:
 156         b = src.read(1)
 157         if not b:
 158             raise Exception('failed to read thumbnail-payload description')
 159 
 160         if b[0] == 0:
 161             break
 162 
 163     # copy all thumbnail bytes
 164     w.write(src.read(size))
 165     return True
 166 
 167 
 168 def handle_id3_picture(w, src) -> bool:
 169     a = ord('A')
 170     p = ord('P')
 171     i = ord('I')
 172     c = ord('C')
 173 
 174     while True:
 175         chunk = src.read(1)
 176         if not chunk:
 177             break
 178         v = chunk[0]
 179 
 180         if v == a:
 181             if src.read(1)[0] != p:
 182                 continue
 183             if src.read(1)[0] != i:
 184                 continue
 185             if src.read(1)[0] != c:
 186                 continue
 187             return handle_apic(w, src)
 188 
 189         if v == p:
 190             if src.read(1)[0] != i:
 191                 continue
 192             if src.read(1)[0] != c:
 193                 continue
 194             return handle_pic(w, src)
 195 
 196     return False
 197 
 198 
 199 def handle_bytes(w, src) -> None:
 200     if not handle_id3_picture(w, src):
 201         raise Exception('no thumbnail found')
 202 
 203 
 204 name = '-' if len(argv) == 1 else argv[1]
 205 if len(argv) > 2:
 206     print('\x1b[31mmultiple inputs not allowed\x1b[0m', file=stderr)
 207     exit(1)
 208 
 209 try:
 210     if seems_url(name):
 211         from urllib.request import urlopen
 212 
 213     # handle all named inputs given
 214     if name == '-':
 215         handle_bytes(stdout.buffer, stdin.buffer)
 216     elif seems_url(name):
 217         with urlopen(name) as inp:
 218             handle_bytes(stdout.buffer, inp)
 219     else:
 220         with open(name, mode='rb') as inp:
 221             handle_bytes(stdout.buffer, inp)
 222 except BrokenPipeError:
 223     # quit quietly, instead of showing a confusing error message
 224     stderr.close()
 225 except KeyboardInterrupt:
 226     exit(2)
 227 except Exception as e:
 228     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 229     exit(1)