File: si.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from base64 import b64decode, b64encode 27 from io import BytesIO 28 from re import compile as compile_re, Pattern 29 from socket import socket 30 from sys import argv, exit, stderr, stdin 31 from typing import Callable, Dict, List, Tuple 32 from webbrowser import open_new_tab 33 34 35 info = ''' 36 si [options...] 37 38 39 Show It shows data read from standard-input, using your default web browser 40 by auto-opening tabs, auto-detecing the data-format, and using a random port 41 among those available. 42 43 The localhost connection is available only until all data are transferred: 44 this means refreshing your browser tab will lose your content, replacing it 45 with a server-not-found message page. 46 47 Dozens of common data-formats are recognized when piped from stdin, such as 48 49 - HTML (web pages) 50 - PDF 51 - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP) 52 - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 53 - video (MP4, MOV, WEBM, MKV, AVI) 54 - JSON 55 - generic UTF-8 plain-text 56 ''' 57 58 # handle standard help cmd-line options, quitting right away in that case 59 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 60 print(info.strip()) 61 exit(0) 62 63 64 # hdr_dispatch groups format-description-groups by their first byte, thus 65 # shortening total lookups for some data header: notice how the `ftyp` data 66 # formats aren't handled here, since these can include any byte in parts of 67 # their first few bytes 68 hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = { 69 0x00: [ 70 (b'\x00\x00\x01\xba', 'video/mpeg'), 71 (b'\x00\x00\x01\xb3', 'video/mpeg'), 72 (b'\x00\x00\x01\x00', 'image/x-icon'), 73 (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'), 74 (b'\x00asm', 'application/wasm'), 75 ], 76 0x1a: [(b'\x1a\x45\xdf\xa3', 'video/webm')], # matches general MKV format 77 0x1f: [(b'\x1f\x8b\x08', 'application/gzip')] 78 0x23: [ 79 (b'#! ', 'text/plain; charset=UTF-8'), 80 (b'#!/', 'text/plain; charset=UTF-8'), 81 ], 82 0x25: [ 83 (b'%PDF', 'application/pdf'), 84 (b'%!PS', 'application/postscript'), 85 ], 86 0x2e: [(b'.snd', 'audio/basic')], 87 0x47: [ 88 (b'GIF87a', 'image/gif'), 89 (b'GIF89a', 'image/gif'), 90 ], 91 0x49: [ 92 # some MP3s start with an ID3 meta-data section 93 (b'ID3\x02', 'audio/mpeg'), 94 (b'ID3\x03', 'audio/mpeg'), 95 (b'ID3\x04', 'audio/mpeg'), 96 (b'II*\x00', 'image/tiff'), 97 ], 98 0x4d: [ 99 (b'MM\x00*', 'image/tiff'), 100 (b'MThd', 'audio/midi'), 101 ], 102 0x4f: [(b'OggS', 'audio/ogg')], 103 0x50: [(b'PK\x03\x04', 'application/zip')], 104 0x63: [(b'caff\x00\x01\x00\x00', 'audio/x-caf')], 105 0x66: [(b'fLaC', 'audio/x-flac')], 106 0x7b: [(b'{\\rtf', 'application/rtf')], 107 0x7f: [(b'\x7fELF', 'application/x-elf')], 108 0x89: [(b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png')], 109 0xff: [ 110 (b'\xff\xd8\xff', 'image/jpeg'), 111 # handle common ways MP3 data start 112 (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'), 113 (b'\xff\xfb', 'audio/mpeg'), 114 ], 115 } 116 117 118 # ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats 119 ftyp_types: Tuple[Tuple[bytes, str]] = ( 120 (b'M4A ', 'audio/aac'), 121 (b'M4A\x00', 'audio/aac'), 122 (b'mp42', 'video/x-m4v'), 123 (b'dash', 'audio/aac'), 124 (b'isom', 'video/mp4'), 125 # (b'isom', 'audio/aac'), 126 (b'MSNV', 'video/mp4'), 127 (b'qt ', 'video/quicktime'), 128 (b'heic', 'image/heic'), 129 (b'avif', 'image/avif'), 130 ) 131 132 # xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML 133 xmlish_heuristics: Tuple[Tuple[bytes, str]] = ( 134 (b'<html>', 'text/html'), 135 (b'<html ', 'text/html'), 136 (b'<head>', 'text/html'), 137 (b'<body>', 'text/html'), 138 (b'<!DOCTYPE html', 'text/html'), 139 (b'<svg>', 'image/svg+xml'), 140 (b'<svg ', 'image/svg+xml'), 141 (b'<?xml', 'application/xml'), 142 ) 143 144 # json_heuristics helps func guess_mime auto-detect JSON via regexes: 145 # it's not perfect, but it seems effective-enough in practice 146 json_heuristics: Tuple[Pattern] = ( 147 compile_re(b'''^\\s*\\{\\s*"'''), 148 compile_re(b'''^\\s*\\{\\s*\\['''), 149 compile_re(b'''^\\s*\\[\\s*"'''), 150 compile_re(b'''^\\s*\\[\\s*\\{'''), 151 compile_re(b'''^\\s*\\[\\s*\\['''), 152 ) 153 154 155 def exact_match(header: bytes, maybe: bytes) -> bool: 156 if len(header) < len(maybe): 157 # not enough bytes to tell if input data match 158 return False 159 return all(x == y for x, y in zip(header, maybe)) 160 161 162 def match_riff(header: bytes) -> str: 163 'Handle a few special cases for func guess_mime.' 164 165 if len(header) < 12 or not header.startswith(b'RIFF'): 166 return '' 167 168 if header.find(b'WEBP', 8, 12) == 8: 169 return 'image/webp' 170 if header.find(b'WAVE', 8, 12) == 8: 171 return 'audio/x-wav' 172 if header.find(b'AVI ', 8, 12) == 8: 173 return 'video/avi' 174 return '' 175 176 177 def match_form(header: bytes) -> str: 178 'Handle a few special cases for func guess_mime.' 179 180 if len(header) < 12 or not header.startswith(b'FORM'): 181 return '' 182 183 if header.find(b'AIFF', 8, 12) == 8: 184 return 'audio/aiff' 185 if header.find(b'AIFC', 8, 12) == 8: 186 return 'audio/aiff' 187 return '' 188 189 190 def match_ftyp(header: bytes) -> str: 191 'Handle a few special cases for func guess_mime.' 192 193 # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp' 194 if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4: 195 return '' 196 197 # next 4 bytes after the ASCII 'ftyp' declare the data-format 198 for marker, mime in ftyp_types: 199 if header.find(marker, 8, 12) == 8: 200 return mime 201 202 # unrecognized MPEG-4-style data-format 203 return '' 204 205 206 def guess_mime(header: bytes, fallback: str) -> str: 207 'Try to auto-detect common MIME-types, given the first few input bytes.' 208 209 # no bytes, no match 210 if len(header) == 0: 211 return fallback 212 213 # check the RIFF formats, AIFF audio, and MPEG-4-like formats 214 for f in (match_riff, match_form, match_ftyp): 215 m = f(header) 216 if m != '': 217 return m 218 219 # maybe it's a bitmap picture, which almost always has 40 on 15th byte 220 if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14: 221 return 'image/x-bmp' 222 223 # check general lookup-table 224 if header[0] in hdr_dispatch: 225 for maybe in hdr_dispatch[header[0]]: 226 if exact_match(header, maybe[0]): 227 return maybe[1] 228 229 if header.find(b'<!DOCTYPE html', 0, 64) >= 0: 230 return 'text/html' 231 232 # try HTML, SVG, and even XML 233 if header.find(b'<', 0, 8) >= 0: 234 for marker, mime in xmlish_heuristics: 235 if header.find(marker, 0, 64) >= 0: 236 return mime 237 238 # try some common cases for JSON 239 for pattern in json_heuristics: 240 if pattern.match(header): 241 return 'application/json' 242 243 # nothing matched 244 return fallback 245 246 247 def show_it(conn, start: bytes, rest) -> None: 248 'Handle both normal input and data-URIs.' 249 250 # handle base64-encoded data-URIs 251 if start.startswith(b'data:'): 252 i = start.find(b';base64,', 0, 64) 253 if i > 0: 254 mime_type = str(start[len('data:'):i], encoding='utf-8') 255 encoded = BytesIO() 256 encoded.write(start[i + len(';base64,'):]) 257 encoded.write(rest.read()) 258 decoded = b64decode(encoded.getvalue()) 259 encoded.close() 260 261 inp = BytesIO(decoded) 262 if mime_type == '': 263 start = inp.read(4096) 264 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 265 show_it_as(conn, start, inp, mime_type) 266 else: 267 show_it_as(conn, bytes(), inp, mime_type) 268 return 269 270 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 271 return show_it_as(conn, start, rest, mime_type) 272 273 274 def show_it_as(conn, start: bytes, rest, mime_type: str) -> None: 275 'This is where the web-serving action happens.' 276 277 # read-ignore all client headers 278 while True: 279 if conn.recv(1024).endswith(b'\r\n\r\n'): 280 break 281 282 # web-browsers insist on auto-downloads when given wave or flac audio 283 for e in ('audio/x-wav', 'audio/x-flac'): 284 if e == mime_type: 285 handle_sound_workaround(conn, mime_type, start, rest) 286 return 287 288 # web-browsers insist on auto-downloads when given bitmap pictures 289 if mime_type == 'image/x-bmp': 290 handle_image_workaround(conn, mime_type, start, rest) 291 return 292 293 # handle all other data formats 294 o = conn.sendall 295 296 o(b'HTTP/1.1 200 OK\r\n') 297 o(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8')) 298 o(b'Content-Disposition: inline\r\n') 299 # tell browser this is the only/last request 300 o(b'Connection: close\r\n') 301 # payload starts right after an empty line 302 o(b'\r\n') 303 304 # send all input bytes 305 o(start) 306 conn.sendfile(rest) 307 308 309 def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None: 310 data = BytesIO() 311 pre = f' <audio controls autofocus src="data:{mime};base64,' 312 313 def emit_inner_body() -> None: 314 conn.sendall(bytes(pre, encoding='utf-8')) 315 data.write(start) 316 data.write(rest.read()) 317 conn.sendall(b64encode(data.getvalue())) 318 conn.sendall(b'"></audio>\n') 319 320 handle_workaround(conn, 'Wave-Audio Sound', emit_inner_body) 321 data.close() 322 323 324 def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None: 325 data = BytesIO() 326 pre = f' <img src="data:{mime};base64,' 327 328 def emit_inner_body() -> None: 329 conn.sendall(bytes(pre, encoding='utf-8')) 330 data.write(start) 331 data.write(rest.read()) 332 conn.sendall(b64encode(data.getvalue())) 333 conn.sendall(b'">\n') 334 335 handle_workaround(conn, 'Bitmap Picture', emit_inner_body) 336 data.close() 337 338 339 def handle_workaround(conn, title: str, handle_inner_body: Callable) -> None: 340 'Avoid annoying auto-download web-browser behavior.' 341 342 o = conn.sendall 343 344 o(b'HTTP/1.1 200 OK\r\n') 345 # auto-detect content-type, and announce it to the client 346 o(b'Content-Type: text/html; charset=UTF-8\r\n') 347 # discourage web-browsers' download-dialogs and/or auto-downloads 348 o(b'Content-Disposition: inline\r\n') 349 # tell browser this is the last request 350 o(b'Connection: close\r\n') 351 # payload starts right after an empty line 352 o(b'\r\n') 353 354 # emit HTML work-around 355 o(b'<!DOCTYPE html>\n') 356 o(b'<html lang="en">\n') 357 o(b'<head>\n') 358 o(b' <meta charset="UTF-8">\n') 359 o(b' <link rel="icon" href="data:,">\n') 360 o(b' <meta name="viewport"') 361 o(b' content="width=device-width, initial-scale=1.0">\n') 362 o(bytes(f' <title>{title}</title>\n', encoding='utf-8')) 363 o(b' <style>\n') 364 o(b' body { margin: auto; }\n') 365 o(b' audio { display: block; margin: auto; width: 90vw; }\n') 366 o(b' img { display: block; margin: auto; }\n') 367 o(b' </style>\n') 368 o(b'</head>\n') 369 o(b'<body>\n') 370 handle_inner_body() 371 o(b'</body>\n') 372 o(b'</html>\n') 373 374 375 try: 376 # opening socket on port 0 randomly picks an available port 377 sock = socket() 378 sock.bind(('localhost', 0)) 379 port = sock.getsockname()[1] 380 sock.settimeout(10.0) 381 # only handle one thing at a time, since it's a one-off server 382 sock.listen(1) 383 384 open_new_tab(f'http://localhost:{port}') 385 386 # handle only a single request-response cycle 387 conn, addr = sock.accept() 388 show_it(conn, stdin.buffer.read(4096), stdin.buffer) 389 conn.close() 390 391 sock.close() 392 except Exception as e: 393 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 394 exit(1)