File: si.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from base64 import b64decode, b64encode 27 from io import BytesIO 28 from re import compile as compile_re, Pattern 29 from socket import socket 30 from sys import argv, exit, stderr, stdin 31 from typing import Callable, Dict, List, Tuple 32 from webbrowser import open_new_tab 33 34 35 info = ''' 36 si [options...] 37 38 39 Show It shows data read from standard-input, using your default web browser 40 by auto-opening tabs, auto-detecing the data-format, and using a random port 41 among those available. 42 43 The localhost connection is available only until all data are transferred: 44 this means refreshing your browser tab will lose your content, replacing it 45 with a server-not-found message page. 46 47 Dozens of common data-formats are recognized when piped from stdin, such as 48 49 - HTML (web pages) 50 - PDF 51 - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP) 52 - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 53 - video (MP4, MOV, WEBM, MKV, AVI) 54 - JSON 55 - generic UTF-8 plain-text 56 ''' 57 58 # handle standard help cmd-line options, quitting right away in that case 59 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 60 print(info.strip()) 61 exit(0) 62 63 64 # hdr_dispatch groups format-description-groups by their first byte, thus 65 # shortening total lookups for some data header: notice how the `ftyp` data 66 # formats aren't handled here, since these can include any byte in parts of 67 # their first few bytes 68 hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = { 69 0x00: [ 70 (b'\x00\x00\x01\xba', 'video/mpeg'), 71 (b'\x00\x00\x01\xb3', 'video/mpeg'), 72 (b'\x00\x00\x01\x00', 'image/x-icon'), 73 (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'), # .cur files 74 (b'\x00asm', 'application/wasm'), 75 ], 76 0x1a: [(b'\x1a\x45\xdf\xa3', 'video/webm')], # matches general MKV format 77 0x1f: [(b'\x1f\x8b\x08', 'application/gzip')], 78 0x23: [ 79 (b'#! ', 'text/plain; charset=UTF-8'), 80 (b'#!/', 'text/plain; charset=UTF-8'), 81 ], 82 0x25: [ 83 (b'%PDF', 'application/pdf'), 84 (b'%!PS', 'application/postscript'), 85 ], 86 0x28: [(b'\x28\xb5\x2f\xfd', 'application/zstd')], 87 0x2e: [(b'.snd', 'audio/basic')], 88 0x47: [ 89 (b'GIF87a', 'image/gif'), 90 (b'GIF89a', 'image/gif'), 91 ], 92 0x49: [ 93 # some MP3s start with an ID3 meta-data section 94 (b'ID3\x02', 'audio/mpeg'), 95 (b'ID3\x03', 'audio/mpeg'), 96 (b'ID3\x04', 'audio/mpeg'), 97 (b'II*\x00', 'image/tiff'), 98 ], 99 0x4d: [ 100 (b'MM\x00*', 'image/tiff'), 101 (b'MThd', 'audio/midi'), 102 ], 103 0x4f: [(b'OggS', 'audio/ogg')], 104 0x50: [(b'PK\x03\x04', 'application/zip')], 105 0x53: [(b'SQLite format 3\x00', 'application/x-sqlite3')], 106 0x63: [(b'caff\x00\x01\x00\x00', 'audio/x-caf')], 107 0x66: [(b'fLaC', 'audio/x-flac')], 108 0x7b: [(b'{\\rtf', 'application/rtf')], 109 0x7f: [(b'\x7fELF', 'application/x-elf')], 110 0x89: [(b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png')], 111 0xff: [ 112 (b'\xff\xd8\xff', 'image/jpeg'), 113 # handle common ways MP3 data start 114 (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'), 115 (b'\xff\xfb', 'audio/mpeg'), 116 ], 117 } 118 119 120 # ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats 121 ftyp_types: Tuple[Tuple[bytes, str]] = ( 122 (b'M4A ', 'audio/aac'), 123 (b'M4A\x00', 'audio/aac'), 124 (b'mp42', 'video/x-m4v'), 125 (b'dash', 'audio/aac'), 126 (b'isom', 'video/mp4'), 127 # (b'isom', 'audio/aac'), 128 (b'MSNV', 'video/mp4'), 129 (b'qt ', 'video/quicktime'), 130 (b'heic', 'image/heic'), 131 (b'avif', 'image/avif'), 132 ) 133 134 # xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML 135 xmlish_heuristics: Tuple[Tuple[bytes, str]] = ( 136 (b'<html>', 'text/html'), 137 (b'<html ', 'text/html'), 138 (b'<head>', 'text/html'), 139 (b'<head ', 'text/html'), 140 (b'<body>', 'text/html'), 141 (b'<body ', 'text/html'), 142 (b'<!DOCTYPE html>', 'text/html'), 143 (b'<!DOCTYPE html ', 'text/html'), 144 (b'<svg>', 'image/svg+xml'), 145 (b'<svg ', 'image/svg+xml'), 146 (b'<?xml>', 'application/xml'), 147 (b'<?xml ', 'application/xml'), 148 ) 149 150 # json_heuristics helps func guess_mime auto-detect JSON via regexes: 151 # it's not perfect, but it seems effective-enough in practice 152 json_heuristics: Tuple[Pattern] = ( 153 compile_re(b'''^\\s*\\{\\s*"'''), 154 compile_re(b'''^\\s*\\{\\s*\\['''), 155 compile_re(b'''^\\s*\\[\\s*"'''), 156 compile_re(b'''^\\s*\\[\\s*\\{'''), 157 compile_re(b'''^\\s*\\[\\s*\\['''), 158 ) 159 160 161 def exact_match(header: bytes, maybe: bytes) -> bool: 162 if len(header) < len(maybe): 163 # not enough bytes to tell if input data match 164 return False 165 return all(x == y for x, y in zip(header, maybe)) 166 167 168 def match_riff(header: bytes) -> str: 169 'Handle a few special cases for func guess_mime.' 170 171 if len(header) < 12 or not header.startswith(b'RIFF'): 172 return '' 173 174 if header.find(b'WEBP', 8, 12) == 8: 175 return 'image/webp' 176 if header.find(b'WAVE', 8, 12) == 8: 177 return 'audio/x-wav' 178 if header.find(b'AVI ', 8, 12) == 8: 179 return 'video/avi' 180 return '' 181 182 183 def match_form(header: bytes) -> str: 184 'Handle a few special cases for func guess_mime.' 185 186 if len(header) < 12 or not header.startswith(b'FORM'): 187 return '' 188 189 if header.find(b'AIFF', 8, 12) == 8: 190 return 'audio/aiff' 191 if header.find(b'AIFC', 8, 12) == 8: 192 return 'audio/aiff' 193 return '' 194 195 196 def match_ftyp(header: bytes) -> str: 197 'Handle a few special cases for func guess_mime.' 198 199 # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp' 200 if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4: 201 return '' 202 203 # next 4 bytes after the ASCII 'ftyp' declare the data-format 204 for marker, mime in ftyp_types: 205 if header.find(marker, 8, 12) == 8: 206 return mime 207 208 # unrecognized MPEG-4-style data-format 209 return '' 210 211 212 def guess_mime(header: bytes, fallback: str) -> str: 213 'Try to auto-detect common MIME-types, given the first few input bytes.' 214 215 # no bytes, no match 216 if len(header) == 0: 217 return fallback 218 219 # check the MPEG-4-like formats, the RIFF formats, and AIFF audio 220 for f in (match_ftyp, match_riff, match_form): 221 m = f(header) 222 if m != '': 223 return m 224 225 # maybe it's a bitmap picture, which almost always has 40 on 15th byte 226 if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14: 227 return 'image/x-bmp' 228 229 # check general lookup-table 230 if header[0] in hdr_dispatch: 231 for maybe in hdr_dispatch[header[0]]: 232 if exact_match(header, maybe[0]): 233 return maybe[1] 234 235 # try HTML, SVG, and even generic XML 236 if header.find(b'<', 0, 8) >= 0: 237 for marker, mime in xmlish_heuristics: 238 if header.find(marker, 0, 64) >= 0: 239 return mime 240 241 # try some common cases for JSON 242 for pattern in json_heuristics: 243 if pattern.match(header): 244 return 'application/json' 245 246 # nothing matched 247 return fallback 248 249 250 def show_it(conn, start: bytes, rest) -> None: 251 'Handle both normal input and data-URIs.' 252 253 # handle base64-encoded data-URIs 254 if start.startswith(b'data:'): 255 i = start.find(b';base64,', 0, 64) 256 if i > 0: 257 mime_type = str(start[len('data:'):i], encoding='utf-8') 258 encoded = BytesIO() 259 encoded.write(start[i + len(';base64,'):]) 260 encoded.write(rest.read()) 261 decoded = b64decode(encoded.getvalue()) 262 encoded.close() 263 264 inp = BytesIO(decoded) 265 if mime_type == '': 266 start = inp.read(4096) 267 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 268 show_it_as(conn, start, inp, mime_type) 269 else: 270 show_it_as(conn, bytes(), inp, mime_type) 271 return 272 273 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 274 return show_it_as(conn, start, rest, mime_type) 275 276 277 def show_it_as(conn, start: bytes, rest, mime_type: str) -> None: 278 'This is where the web-serving action happens.' 279 280 # read-ignore all client headers 281 while True: 282 if conn.recv(1024).endswith(b'\r\n\r\n'): 283 break 284 285 # web-browsers insist on auto-downloads when given wave or flac audio 286 for e in ('audio/x-wav', 'audio/x-flac'): 287 if e == mime_type: 288 handle_sound_workaround(conn, mime_type, start, rest) 289 return 290 291 # web-browsers insist on auto-downloads when given bitmap pictures 292 if mime_type == 'image/x-bmp': 293 handle_image_workaround(conn, mime_type, start, rest) 294 return 295 296 # handle all other data formats 297 o = conn.sendall 298 299 o(b'HTTP/1.1 200 OK\r\n') 300 o(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8')) 301 o(b'Content-Disposition: inline\r\n') 302 # tell browser this is the only/last request 303 o(b'Connection: close\r\n') 304 # payload starts right after an empty line 305 o(b'\r\n') 306 307 # send all input bytes 308 o(start) 309 conn.sendfile(rest) 310 311 312 def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None: 313 data = BytesIO() 314 pre = f' <audio controls autofocus src="data:{mime};base64,' 315 316 def emit_inner_body() -> None: 317 conn.sendall(bytes(pre, encoding='utf-8')) 318 data.write(start) 319 data.write(rest.read()) 320 conn.sendall(b64encode(data.getvalue())) 321 conn.sendall(b'"></audio>\n') 322 323 handle_workaround(conn, 'Wave-Audio Sound', emit_inner_body) 324 data.close() 325 326 327 def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None: 328 data = BytesIO() 329 pre = f' <img src="data:{mime};base64,' 330 331 def emit_inner_body() -> None: 332 conn.sendall(bytes(pre, encoding='utf-8')) 333 data.write(start) 334 data.write(rest.read()) 335 conn.sendall(b64encode(data.getvalue())) 336 conn.sendall(b'">\n') 337 338 handle_workaround(conn, 'Bitmap Picture', emit_inner_body) 339 data.close() 340 341 342 def handle_workaround(conn, title: str, handle_inner_body: Callable) -> None: 343 'Avoid annoying auto-download web-browser behavior.' 344 345 o = conn.sendall 346 347 o(b'HTTP/1.1 200 OK\r\n') 348 # auto-detect content-type, and announce it to the client 349 o(b'Content-Type: text/html; charset=UTF-8\r\n') 350 # discourage web-browsers' download-dialogs and/or auto-downloads 351 o(b'Content-Disposition: inline\r\n') 352 # tell browser this is the last request 353 o(b'Connection: close\r\n') 354 # payload starts right after an empty line 355 o(b'\r\n') 356 357 # emit HTML work-around 358 o(b'<!DOCTYPE html>\n') 359 o(b'<html lang="en">\n') 360 o(b'<head>\n') 361 o(b' <meta charset="UTF-8">\n') 362 o(b' <link rel="icon" href="data:,">\n') 363 o(b' <meta name="viewport"') 364 o(b' content="width=device-width, initial-scale=1.0">\n') 365 o(bytes(f' <title>{title}</title>\n', encoding='utf-8')) 366 o(b' <style>\n') 367 o(b' body { margin: auto; }\n') 368 o(b' audio { display: block; margin: auto; width: 90vw; }\n') 369 o(b' img { display: block; margin: auto; }\n') 370 o(b' </style>\n') 371 o(b'</head>\n') 372 o(b'<body>\n') 373 handle_inner_body() 374 o(b'</body>\n') 375 o(b'</html>\n') 376 377 378 try: 379 # opening socket on port 0 randomly picks an available port 380 sock = socket() 381 sock.bind(('localhost', 0)) 382 port = sock.getsockname()[1] 383 sock.settimeout(10.0) 384 # only handle one thing at a time, since it's a one-off server 385 sock.listen(1) 386 387 open_new_tab(f'http://localhost:{port}') 388 389 # handle only a single request-response cycle 390 conn, addr = sock.accept() 391 show_it(conn, stdin.buffer.read(4096), stdin.buffer) 392 conn.close() 393 394 sock.close() 395 except Exception as e: 396 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 397 exit(1)