File: si.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from base64 import b64decode, b64encode
  27 from io import BytesIO
  28 from re import compile as compile_re, Pattern
  29 from socket import socket
  30 from sys import argv, exit, stderr, stdin
  31 from typing import Callable, Dict, List, Tuple
  32 from webbrowser import open_new_tab
  33 
  34 
  35 info = '''
  36 si [options...]
  37 
  38 
  39 Show It shows data read from standard-input, using your default web browser
  40 by auto-opening tabs, auto-detecing the data-format, and using a random port
  41 among those available.
  42 
  43 The localhost connection is available only until all data are transferred:
  44 this means refreshing your browser tab will lose your content, replacing it
  45 with a server-not-found message page.
  46 
  47 Dozens of common data-formats are recognized when piped from stdin, such as
  48 
  49   - HTML (web pages)
  50   - PDF
  51   - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP)
  52   - audio (AAC, MP3, FLAC, WAV, AU, MIDI)
  53   - video (MP4, MOV, WEBM, MKV, AVI)
  54   - JSON
  55   - generic UTF-8 plain-text
  56 '''
  57 
  58 # handle standard help cmd-line options, quitting right away in that case
  59 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  60     print(info.strip())
  61     exit(0)
  62 
  63 
  64 # hdr_dispatch groups format-description-groups by their first byte, thus
  65 # shortening total lookups for some data header: notice how the `ftyp` data
  66 # formats aren't handled here, since these can include any byte in parts of
  67 # their first few bytes
  68 hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = {
  69     0x00: [
  70         (b'\x00\x00\x01\xba', 'video/mpeg'),
  71         (b'\x00\x00\x01\xb3', 'video/mpeg'),
  72         (b'\x00\x00\x01\x00', 'image/x-icon'),
  73         (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'), # .cur files
  74         (b'\x00asm', 'application/wasm'),
  75     ],
  76     0x1a: [(b'\x1a\x45\xdf\xa3', 'video/webm')], # matches general MKV format
  77     0x1f: [(b'\x1f\x8b\x08', 'application/gzip')],
  78     0x23: [
  79         (b'#! ', 'text/plain; charset=UTF-8'),
  80         (b'#!/', 'text/plain; charset=UTF-8'),
  81     ],
  82     0x25: [
  83         (b'%PDF', 'application/pdf'),
  84         (b'%!PS', 'application/postscript'),
  85     ],
  86     0x28: [(b'\x28\xb5\x2f\xfd', 'application/zstd')],
  87     0x2e: [(b'.snd', 'audio/basic')],
  88     0x47: [
  89         (b'GIF87a', 'image/gif'),
  90         (b'GIF89a', 'image/gif'),
  91     ],
  92     0x49: [
  93         # some MP3s start with an ID3 meta-data section
  94         (b'ID3\x02', 'audio/mpeg'),
  95         (b'ID3\x03', 'audio/mpeg'),
  96         (b'ID3\x04', 'audio/mpeg'),
  97         (b'II*\x00', 'image/tiff'),
  98     ],
  99     0x4d: [
 100         (b'MM\x00*', 'image/tiff'),
 101         (b'MThd', 'audio/midi'),
 102     ],
 103     0x4f: [(b'OggS', 'audio/ogg')],
 104     0x50: [(b'PK\x03\x04', 'application/zip')],
 105     0x53: [(b'SQLite format 3\x00', 'application/x-sqlite3')],
 106     0x63: [(b'caff\x00\x01\x00\x00', 'audio/x-caf')],
 107     0x66: [(b'fLaC', 'audio/x-flac')],
 108     0x7b: [(b'{\\rtf', 'application/rtf')],
 109     0x7f: [(b'\x7fELF', 'application/x-elf')],
 110     0x89: [(b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png')],
 111     0xff: [
 112         (b'\xff\xd8\xff', 'image/jpeg'),
 113         # handle common ways MP3 data start
 114         (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'),
 115         (b'\xff\xfb', 'audio/mpeg'),
 116     ],
 117 }
 118 
 119 
 120 # ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats
 121 ftyp_types: Tuple[Tuple[bytes, str]] = (
 122     (b'M4A ', 'audio/aac'),
 123     (b'M4A\x00', 'audio/aac'),
 124     (b'mp42', 'video/x-m4v'),
 125     (b'dash', 'audio/aac'),
 126     (b'isom', 'video/mp4'),
 127     # (b'isom', 'audio/aac'),
 128     (b'MSNV', 'video/mp4'),
 129     (b'qt  ', 'video/quicktime'),
 130     (b'heic', 'image/heic'),
 131     (b'avif', 'image/avif'),
 132 )
 133 
 134 # xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML
 135 xmlish_heuristics: Tuple[Tuple[bytes, str]] = (
 136     (b'<html>', 'text/html'),
 137     (b'<html ', 'text/html'),
 138     (b'<head>', 'text/html'),
 139     (b'<head ', 'text/html'),
 140     (b'<body>', 'text/html'),
 141     (b'<body ', 'text/html'),
 142     (b'<!DOCTYPE html>', 'text/html'),
 143     (b'<!DOCTYPE html ', 'text/html'),
 144     (b'<svg>', 'image/svg+xml'),
 145     (b'<svg ', 'image/svg+xml'),
 146     (b'<?xml>', 'application/xml'),
 147     (b'<?xml ', 'application/xml'),
 148 )
 149 
 150 # json_heuristics helps func guess_mime auto-detect JSON via regexes:
 151 # it's not perfect, but it seems effective-enough in practice
 152 json_heuristics: Tuple[Pattern] = (
 153     compile_re(b'''^\\s*\\{\\s*"'''),
 154     compile_re(b'''^\\s*\\{\\s*\\['''),
 155     compile_re(b'''^\\s*\\[\\s*"'''),
 156     compile_re(b'''^\\s*\\[\\s*\\{'''),
 157     compile_re(b'''^\\s*\\[\\s*\\['''),
 158 )
 159 
 160 
 161 def exact_match(header: bytes, maybe: bytes) -> bool:
 162     if len(header) < len(maybe):
 163         # not enough bytes to tell if input data match
 164         return False
 165     return all(x == y for x, y in zip(header, maybe))
 166 
 167 
 168 def match_riff(header: bytes) -> str:
 169     'Handle a few special cases for func guess_mime.'
 170 
 171     if len(header) < 12 or not header.startswith(b'RIFF'):
 172         return ''
 173 
 174     if header.find(b'WEBP', 8, 12) == 8:
 175         return 'image/webp'
 176     if header.find(b'WAVE', 8, 12) == 8:
 177         return 'audio/x-wav'
 178     if header.find(b'AVI ', 8, 12) == 8:
 179         return 'video/avi'
 180     return ''
 181 
 182 
 183 def match_form(header: bytes) -> str:
 184     'Handle a few special cases for func guess_mime.'
 185 
 186     if len(header) < 12 or not header.startswith(b'FORM'):
 187         return ''
 188 
 189     if header.find(b'AIFF', 8, 12) == 8:
 190         return 'audio/aiff'
 191     if header.find(b'AIFC', 8, 12) == 8:
 192         return 'audio/aiff'
 193     return ''
 194 
 195 
 196 def match_ftyp(header: bytes) -> str:
 197     'Handle a few special cases for func guess_mime.'
 198 
 199     # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp'
 200     if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4:
 201         return ''
 202 
 203     # next 4 bytes after the ASCII 'ftyp' declare the data-format
 204     for marker, mime in ftyp_types:
 205         if header.find(marker, 8, 12) == 8:
 206             return mime
 207 
 208     # unrecognized MPEG-4-style data-format
 209     return ''
 210 
 211 
 212 def guess_mime(header: bytes, fallback: str) -> str:
 213     'Try to auto-detect common MIME-types, given the first few input bytes.'
 214 
 215     # no bytes, no match
 216     if len(header) == 0:
 217         return fallback
 218 
 219     # check the MPEG-4-like formats, the RIFF formats, and AIFF audio
 220     for f in (match_ftyp, match_riff, match_form):
 221         m = f(header)
 222         if m != '':
 223             return m
 224 
 225     # maybe it's a bitmap picture, which almost always has 40 on 15th byte
 226     if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14:
 227         return 'image/x-bmp'
 228 
 229     # check general lookup-table
 230     if header[0] in hdr_dispatch:
 231         for maybe in hdr_dispatch[header[0]]:
 232             if exact_match(header, maybe[0]):
 233                 return maybe[1]
 234 
 235     # try HTML, SVG, and even generic XML
 236     if header.find(b'<', 0, 8) >= 0:
 237         for marker, mime in xmlish_heuristics:
 238             if header.find(marker, 0, 64) >= 0:
 239                 return mime
 240 
 241     # try some common cases for JSON
 242     for pattern in json_heuristics:
 243         if pattern.match(header):
 244             return 'application/json'
 245 
 246     # nothing matched
 247     return fallback
 248 
 249 
 250 def show_it(conn, start: bytes, rest) -> None:
 251     'Handle both normal input and data-URIs.'
 252 
 253     # handle base64-encoded data-URIs
 254     if start.startswith(b'data:'):
 255         i = start.find(b';base64,', 0, 64)
 256         if i > 0:
 257             mime_type = str(start[len('data:'):i], encoding='utf-8')
 258             encoded = BytesIO()
 259             encoded.write(start[i + len(';base64,'):])
 260             encoded.write(rest.read())
 261             decoded = b64decode(encoded.getvalue())
 262             encoded.close()
 263 
 264             inp = BytesIO(decoded)
 265             if mime_type == '':
 266                 start = inp.read(4096)
 267                 mime_type = guess_mime(start, 'text/plain; charset=UTF-8')
 268                 show_it_as(conn, start, inp, mime_type)
 269             else:
 270                 show_it_as(conn, bytes(), inp, mime_type)
 271             return
 272 
 273     mime_type = guess_mime(start, 'text/plain; charset=UTF-8')
 274     return show_it_as(conn, start, rest, mime_type)
 275 
 276 
 277 def show_it_as(conn, start: bytes, rest, mime_type: str) -> None:
 278     'This is where the web-serving action happens.'
 279 
 280     # read-ignore all client headers
 281     while True:
 282         if conn.recv(1024).endswith(b'\r\n\r\n'):
 283             break
 284 
 285     # web-browsers insist on auto-downloads when given wave or flac audio
 286     for e in ('audio/x-wav', 'audio/x-flac'):
 287         if e == mime_type:
 288             handle_sound_workaround(conn, mime_type, start, rest)
 289             return
 290 
 291     # web-browsers insist on auto-downloads when given bitmap pictures
 292     if mime_type == 'image/x-bmp':
 293         handle_image_workaround(conn, mime_type, start, rest)
 294         return
 295 
 296     # handle all other data formats
 297     o = conn.sendall
 298 
 299     o(b'HTTP/1.1 200 OK\r\n')
 300     o(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8'))
 301     o(b'Content-Disposition: inline\r\n')
 302     # tell browser this is the only/last request
 303     o(b'Connection: close\r\n')
 304     # payload starts right after an empty line
 305     o(b'\r\n')
 306 
 307     # send all input bytes
 308     o(start)
 309     conn.sendfile(rest)
 310 
 311 
 312 def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None:
 313     data = BytesIO()
 314     pre = f'    <audio controls autofocus src="data:{mime};base64,'
 315 
 316     def emit_inner_body() -> None:
 317         conn.sendall(bytes(pre, encoding='utf-8'))
 318         data.write(start)
 319         data.write(rest.read())
 320         conn.sendall(b64encode(data.getvalue()))
 321         conn.sendall(b'"></audio>\n')
 322 
 323     handle_workaround(conn, 'Wave-Audio Sound', emit_inner_body)
 324     data.close()
 325 
 326 
 327 def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None:
 328     data = BytesIO()
 329     pre = f'    <img src="data:{mime};base64,'
 330 
 331     def emit_inner_body() -> None:
 332         conn.sendall(bytes(pre, encoding='utf-8'))
 333         data.write(start)
 334         data.write(rest.read())
 335         conn.sendall(b64encode(data.getvalue()))
 336         conn.sendall(b'">\n')
 337 
 338     handle_workaround(conn, 'Bitmap Picture', emit_inner_body)
 339     data.close()
 340 
 341 
 342 def handle_workaround(conn, title: str, handle_inner_body: Callable) -> None:
 343     'Avoid annoying auto-download web-browser behavior.'
 344 
 345     o = conn.sendall
 346 
 347     o(b'HTTP/1.1 200 OK\r\n')
 348     # auto-detect content-type, and announce it to the client
 349     o(b'Content-Type: text/html; charset=UTF-8\r\n')
 350     # discourage web-browsers' download-dialogs and/or auto-downloads
 351     o(b'Content-Disposition: inline\r\n')
 352     # tell browser this is the last request
 353     o(b'Connection: close\r\n')
 354     # payload starts right after an empty line
 355     o(b'\r\n')
 356 
 357     # emit HTML work-around
 358     o(b'<!DOCTYPE html>\n')
 359     o(b'<html lang="en">\n')
 360     o(b'<head>\n')
 361     o(b'    <meta charset="UTF-8">\n')
 362     o(b'    <link rel="icon" href="data:,">\n')
 363     o(b'    <meta name="viewport"')
 364     o(b' content="width=device-width, initial-scale=1.0">\n')
 365     o(bytes(f'    <title>{title}</title>\n', encoding='utf-8'))
 366     o(b'    <style>\n')
 367     o(b'        body { margin: auto; }\n')
 368     o(b'        audio { display: block; margin: auto; width: 90vw; }\n')
 369     o(b'        img { display: block; margin: auto; }\n')
 370     o(b'    </style>\n')
 371     o(b'</head>\n')
 372     o(b'<body>\n')
 373     handle_inner_body()
 374     o(b'</body>\n')
 375     o(b'</html>\n')
 376 
 377 
 378 try:
 379     # opening socket on port 0 randomly picks an available port
 380     sock = socket()
 381     sock.bind(('localhost', 0))
 382     port = sock.getsockname()[1]
 383     sock.settimeout(10.0)
 384     # only handle one thing at a time, since it's a one-off server
 385     sock.listen(1)
 386 
 387     open_new_tab(f'http://localhost:{port}')
 388 
 389     # handle only a single request-response cycle
 390     conn, addr = sock.accept()
 391     show_it(conn, stdin.buffer.read(4096), stdin.buffer)
 392     conn.close()
 393 
 394     sock.close()
 395 except Exception as e:
 396     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 397     exit(1)