File: si.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # si [options...]
  27 #
  28 # Show It shows data read from standard-input, using your default web browser
  29 # by auto-opening tabs, auto-detecing the data-format, and using a random port
  30 # among those available.
  31 #
  32 # The localhost connection is available only until all data are transferred:
  33 # this means refreshing your browser tab will lose your content, replacing it
  34 # with a server-not-found message page.
  35 #
  36 # Dozens of common data-formats are recognized when piped from stdin, such as
  37 #
  38 #   - HTML (web pages)
  39 #   - PDF
  40 #   - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP)
  41 #   - audio (AAC, MP3, FLAC, WAV, AU, MIDI)
  42 #   - video (MP4, MOV, WEBM, MKV, AVI)
  43 #   - JSON
  44 #   - generic UTF-8 plain-text
  45 
  46 
  47 from base64 import b64decode, b64encode
  48 from io import BytesIO
  49 from re import compile as compile_re, Pattern
  50 from socket import socket
  51 from sys import argv, exit, stderr, stdin
  52 from typing import Dict, List, Tuple
  53 from webbrowser import open_new_tab
  54 
  55 
  56 # info is the help message shown when asked to
  57 info = '''
  58 si [options...]
  59 
  60 Show It shows data read from standard-input, using your default web browser
  61 by auto-opening tabs, auto-detecing the data-format, and using a random port
  62 among those available.
  63 
  64 The localhost connection is available only until all data are transferred:
  65 this means refreshing your browser tab will lose your content, replacing it
  66 with a server-not-found message page.
  67 
  68 Dozens of common data-formats are recognized when piped from stdin, such as
  69 
  70   - HTML (web pages)
  71   - PDF
  72   - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP)
  73   - audio (AAC, MP3, FLAC, WAV, AU, MIDI)
  74   - video (MP4, MOV, WEBM, MKV, AVI)
  75   - JSON
  76   - generic UTF-8 plain-text
  77 '''.strip()
  78 
  79 # handle standard help cmd-line options, quitting right away in that case
  80 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  81     print(info, file=stderr)
  82     exit(0)
  83 
  84 
  85 # hdr_dispatch groups format-description-groups by their first byte, thus
  86 # shortening total lookups for some data header: notice how the `ftyp` data
  87 # formats aren't handled here, since these can include any byte in parts of
  88 # their first few bytes
  89 hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = {
  90     0x00: [
  91         (b'\x00\x00\x01\xba', 'video/mpeg'),
  92         (b'\x00\x00\x01\xb3', 'video/mpeg'),
  93         (b'\x00\x00\x01\x00', 'image/x-icon'),
  94         (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'),
  95     ],
  96 
  97     0x1a: [
  98         # handle webm and MKV, which is practically like webm
  99         (b'\x1a\x45\xdf\xa3', 'video/webm'),
 100     ],
 101 
 102     0x23: [
 103         (b'#! ', 'text/plain; charset=UTF-8'),
 104         (b'#!/', 'text/plain; charset=UTF-8'),
 105     ],
 106 
 107     0x25: [
 108         (b'%PDF', 'application/pdf'),
 109         (b'%!PS', 'application/postscript'),
 110     ],
 111 
 112     0x2e: [
 113         (b'.snd', 'audio/basic'),
 114     ],
 115 
 116     0x47: [
 117         (b'GIF87a', 'image/gif'),
 118         (b'GIF89a', 'image/gif'),
 119     ],
 120 
 121     0x49: [
 122         # handle MP3 which start with an ID3 meta-data section
 123         (b'ID3\x02', 'audio/mpeg'),
 124         (b'ID3\x03', 'audio/mpeg'),
 125         (b'ID3\x04', 'audio/mpeg'),
 126 
 127         (b'II*\x00', 'image/tiff'),
 128     ],
 129 
 130     0x4d: [
 131         (b'MM\x00*', 'image/tiff'),
 132         (b'MThd', 'audio/midi'),
 133     ],
 134 
 135     0x4f: [
 136         # the opus audio-format is usually inside an ogg-format container
 137         (b'OggS', 'audio/ogg'),
 138     ],
 139 
 140     0x63: [
 141         (b'caff\x00\x01\x00\x00', 'audio/x-caf'),
 142     ],
 143 
 144     0x66: [
 145         (b'fLaC', 'audio/x-flac'),
 146     ],
 147 
 148     0x89: [
 149         (b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png'),
 150     ],
 151 
 152     0xff: [
 153         (b'\xff\xd8\xff', 'image/jpeg'),
 154 
 155         # handle common ways MP3 data start
 156         (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'),
 157         (b'\xff\xfb', 'audio/mpeg'),
 158     ],
 159 }
 160 
 161 
 162 # ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats
 163 ftyp_types: Tuple[Tuple[bytes, str]] = (
 164     (b'M4A ', 'audio/aac'),
 165     (b'M4A\x00', 'audio/aac'),
 166     (b'dash', 'audio/aac'),
 167     (b'isom', 'video/mp4'),
 168     # (b'isom', 'audio/aac'),
 169     (b'MSNV', 'video/mp4'),
 170     (b'qt  ', 'video/quicktime'),
 171     (b'heic', 'image/heic'),
 172     (b'avif', 'image/avif'),
 173 )
 174 
 175 # xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML
 176 xmlish_heuristics: Tuple[Tuple[bytes, str]] = (
 177     (b'<html>', 'text/html'),
 178     (b'<html ', 'text/html'),
 179     (b'<head>', 'text/html'),
 180     (b'<body>', 'text/html'),
 181     (b'<!DOCTYPE html>', 'text/html'),
 182     (b'<svg>', 'image/svg+xml'),
 183     (b'<svg ', 'image/svg+xml'),
 184     (b'<?xml', 'application/xml'),
 185 )
 186 
 187 # json_heuristics helps func guess_mime auto-detect JSON via regexes:
 188 # it's not perfect, but it seems effective-enough in practice
 189 json_heuristics: Tuple[Pattern] = (
 190     compile_re(b'''^\\s*\\{\\s*"'''),
 191     compile_re(b'''^\\s*\\{\\s*\\['''),
 192     compile_re(b'''^\\s*\\[\\s*"'''),
 193     compile_re(b'''^\\s*\\[\\s*\\{'''),
 194     compile_re(b'''^\\s*\\[\\s*\\['''),
 195 )
 196 
 197 
 198 def exact_match(header: bytes, maybe: bytes) -> bool:
 199     if len(header) < len(maybe):
 200         # not enough bytes to tell if input data match
 201         return False
 202     return all(x == y for x, y in zip(header, maybe))
 203 
 204 
 205 def match_riff(header: bytes) -> str:
 206     '''Handle a few special cases for func guess_mime.'''
 207 
 208     if len(header) < 12 or not header.startswith(b'RIFF'):
 209         return ''
 210 
 211     if header.find(b'WEBP', 8, 12) == 8:
 212         return 'image/webp'
 213     if header.find(b'WAVE', 8, 12) == 8:
 214         return 'audio/x-wav'
 215     if header.find(b'AVI ', 8, 12) == 8:
 216         return 'video/avi'
 217     return ''
 218 
 219 
 220 def match_form(header: bytes) -> str:
 221     '''Handle a few special cases for func guess_mime.'''
 222 
 223     if len(header) < 12 or not header.startswith(b'FORM'):
 224         return ''
 225 
 226     if header.find(b'AIFF', 8, 12) == 8:
 227         return 'audio/aiff'
 228     if header.find(b'AIFC', 8, 12) == 8:
 229         return 'audio/aiff'
 230     return ''
 231 
 232 
 233 def match_ftyp(header: bytes) -> str:
 234     '''Handle a few special cases for func guess_mime.'''
 235 
 236     # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp'
 237     if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4:
 238         return ''
 239 
 240     # next 4 bytes after the ASCII 'ftyp' declare the data-format
 241     for marker, mime in ftyp_types:
 242         if header.find(marker, 8, 12) == 8:
 243             return mime
 244 
 245     # unrecognized MPEG-4-style data-format
 246     return ''
 247 
 248 
 249 def guess_mime(header: bytes, fallback: str) -> str:
 250     '''
 251     Try to auto-detect MIME-types for common file-types, given the first
 252     few bytes read from them.
 253     '''
 254 
 255     # no bytes, no match
 256     if len(header) == 0:
 257         return fallback
 258 
 259     # maybe it's one of the RIFF formats
 260     m = match_riff(header)
 261     if m != '':
 262         return m
 263 
 264     # maybe it's AIFF audio
 265     m = match_form(header)
 266     if m != '':
 267         return m
 268 
 269     # maybe it's an MPEG-4-like format
 270     m = match_ftyp(header)
 271     if m != '':
 272         return m
 273 
 274     # maybe it's a bitmap picture, which almost always has 40 on 15th byte
 275     if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14:
 276         return 'image/x-bmp'
 277 
 278     # check general lookup-table
 279     if header[0] in hdr_dispatch:
 280         for maybe in hdr_dispatch[header[0]]:
 281             if exact_match(header, maybe[0]):
 282                 return maybe[1]
 283 
 284     # try HTML, SVG, and even XML
 285     if header.find(b'<', 0, 8) >= 0:
 286         for marker, mime in xmlish_heuristics:
 287             if header.find(marker, 0, 64) >= 0:
 288                 return mime
 289 
 290     # try some common cases for JSON
 291     for pattern in json_heuristics:
 292         if pattern.match(header):
 293             return 'application/json'
 294 
 295     # nothing matched
 296     return fallback
 297 
 298 
 299 def show_it(conn, start: bytes, rest) -> None:
 300     '''Handle both normal input and data-URIs.'''
 301 
 302     # handle base64-encoded data-URIs
 303     if start.startswith(b'data:'):
 304         i = start.find(b';base64,', 0, 64)
 305         if i > 0:
 306             mime_type = str(start[len('data:'):i], encoding='utf-8')
 307             encoded = BytesIO()
 308             encoded.write(start[i + len(';base64,'):])
 309             encoded.write(rest.read())
 310             decoded = b64decode(encoded.getvalue())
 311             encoded.close()
 312 
 313             inp = BytesIO(decoded)
 314             if mime_type == '':
 315                 start = inp.read(4096)
 316                 mime_type = guess_mime(start, 'text/plain; charset=UTF-8')
 317                 show_it_as(conn, start, inp, mime_type)
 318             else:
 319                 show_it_as(conn, bytes(), inp, mime_type)
 320             return
 321 
 322     mime_type = guess_mime(start, 'text/plain; charset=UTF-8')
 323     return show_it_as(conn, start, rest, mime_type)
 324 
 325 
 326 def show_it_as(conn, start: bytes, rest, mime_type: str) -> None:
 327     '''This is where the web-serving action happens.'''
 328 
 329     # read-ignore all client headers
 330     while True:
 331         if conn.recv(1024).endswith(b'\r\n\r\n'):
 332             break
 333 
 334     # web-browsers insist on auto-downloads when given wave or flac audio
 335     for e in ('audio/x-wav', 'audio/x-flac'):
 336         if e == mime_type:
 337             handle_sound_workaround(conn, mime_type, start, rest)
 338             return
 339 
 340     # web-browsers insist on auto-downloads when given bitmap pictures
 341     if mime_type == 'image/x-bmp':
 342         handle_image_workaround(conn, mime_type, start, rest)
 343         return
 344 
 345     # handle all other data formats
 346 
 347     conn.sendall(b'HTTP/1.1 200 OK\r\n')
 348     conn.sendall(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8'))
 349     conn.sendall(b'Content-Disposition: inline\r\n')
 350     # tell browser this is the only/last request
 351     conn.sendall(b'Connection: close\r\n')
 352     # payload starts right after an empty line
 353     conn.sendall(b'\r\n')
 354 
 355     # send all input bytes
 356     conn.sendall(start)
 357     conn.sendfile(rest)
 358 
 359 
 360 def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None:
 361     data = BytesIO()
 362     pre = f'    <audio controls autofocus src="data:{mime};base64,'
 363 
 364     def emit_inner_body() -> None:
 365         conn.sendall(bytes(pre, encoding='utf-8'))
 366         data.write(start)
 367         data.write(rest.read())
 368         conn.sendall(b64encode(data.getvalue()))
 369         conn.sendall(b'"></audio>\n')
 370 
 371     handle_workaround(conn, 'Wave-Audio Sound', emit_inner_body)
 372     data.close()
 373 
 374 
 375 def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None:
 376     data = BytesIO()
 377     pre = f'    <img src="data:{mime};base64,'
 378 
 379     def emit_inner_body() -> None:
 380         conn.sendall(bytes(pre, encoding='utf-8'))
 381         data.write(start)
 382         data.write(rest.read())
 383         conn.sendall(b64encode(data.getvalue()))
 384         conn.sendall(b'">\n')
 385 
 386     handle_workaround(conn, 'Bitmap Picture', emit_inner_body)
 387     data.close()
 388 
 389 
 390 def handle_workaround(conn, title: str, handle_inner_body) -> None:
 391     '''
 392     Avoid annoying web-browser behavior when given wave-audio data
 393     and/or bitmap pictures.
 394     '''
 395 
 396     conn.sendall(b'HTTP/1.1 200 OK\r\n')
 397     # auto-detect content-type, and announce it to the client
 398     conn.sendall(b'Content-Type: text/html; charset=UTF-8\r\n')
 399     # discourage web-browsers' download-dialogs and/or auto-downloads
 400     conn.sendall(b'Content-Disposition: inline\r\n')
 401     # tell browser this is the last request
 402     conn.sendall(b'Connection: close\r\n')
 403     # payload starts right after an empty line
 404     conn.sendall(b'\r\n')
 405 
 406     # emit HTML work-around
 407     conn.sendall(b'<!DOCTYPE html>\n')
 408     conn.sendall(b'<html lang="en">\n')
 409     conn.sendall(b'<head>\n')
 410     conn.sendall(b'    <meta charset="UTF-8">\n')
 411     conn.sendall(b'    <link rel="icon" href="data:,">\n')
 412     conn.sendall(b'    <meta name="viewport"')
 413     conn.sendall(b' content="width=device-width, initial-scale=1.0">\n')
 414     conn.sendall(bytes(f'    <title>{title}</title>\n', encoding='utf-8'))
 415     conn.sendall(b'    <style>\n')
 416     conn.sendall(b'        body { margin: auto; }\n')
 417     conn.sendall(b'        ')
 418     conn.sendall(b'audio { display: block; margin: auto; width: 90vw; }\n')
 419     conn.sendall(b'        img { display: block; margin: auto; }\n')
 420     conn.sendall(b'    </style>\n')
 421     conn.sendall(b'</head>\n')
 422     conn.sendall(b'<body>\n')
 423     handle_inner_body()
 424     conn.sendall(b'</body>\n')
 425     conn.sendall(b'</html>\n')
 426 
 427 
 428 def check_mime_table(table: Dict[int, List[Tuple[bytes, str]]]) -> None:
 429     '''Check table used for data-format auto-detection.'''
 430 
 431     for k, v in table.items():
 432         for i, m in enumerate(v):
 433             if len(m[0]) < 1 or m[0][0] != k:
 434                 k = hex(k)
 435                 v = hex(m[0][0])
 436                 msg = f'{k}: wrong first byte ({v}) in entry (index {i})'
 437                 raise ValueError(msg)
 438 
 439 
 440 try:
 441     check_mime_table(hdr_dispatch)
 442 
 443     # opening socket on port 0 randomly picks an available port
 444     sock = socket()
 445     sock.bind(('localhost', 0))
 446     port = sock.getsockname()[1]
 447     sock.settimeout(10.0)
 448     # only handle one thing at a time, since it's a one-off server
 449     sock.listen(1)
 450 
 451     open_new_tab(f'http://localhost:{port}')
 452 
 453     # handle only a single request-response cycle
 454     conn, addr = sock.accept()
 455     show_it(conn, stdin.buffer.read(4096), stdin.buffer)
 456     conn.close()
 457 
 458     sock.close()
 459 except Exception as e:
 460     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 461     exit(1)