File: si.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from base64 import b64decode, b64encode 27 from io import BytesIO 28 from re import compile as compile_re, Pattern 29 from socket import socket 30 from sys import argv, exit, stderr, stdin 31 from typing import Callable, Dict, List, Tuple 32 from webbrowser import open_new_tab 33 34 35 info = ''' 36 si [options...] 37 38 39 Show It shows data read from standard-input, using your default web browser 40 by auto-opening tabs, auto-detecing the data-format, and using a random port 41 among those available. 42 43 The localhost connection is available only until all data are transferred: 44 this means refreshing your browser tab will lose your content, replacing it 45 with a server-not-found message page. 46 47 Dozens of common data-formats are recognized when piped from stdin, such as 48 49 - HTML (web pages) 50 - PDF 51 - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP) 52 - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 53 - video (MP4, MOV, WEBM, MKV, AVI) 54 - JSON 55 - generic UTF-8 plain-text 56 ''' 57 58 # handle standard help cmd-line options, quitting right away in that case 59 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 60 print(info.strip()) 61 exit(0) 62 63 64 # hdr_dispatch groups format-description-groups by their first byte, thus 65 # shortening total lookups for some data header: notice how the `ftyp` data 66 # formats aren't handled here, since these can include any byte in parts of 67 # their first few bytes 68 hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = { 69 0x00: [ 70 (b'\x00\x00\x01\xba', 'video/mpeg'), 71 (b'\x00\x00\x01\xb3', 'video/mpeg'), 72 (b'\x00\x00\x01\x00', 'image/x-icon'), 73 (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'), # .cur files 74 (b'\x00asm', 'application/wasm'), 75 ], 76 0x1a: [(b'\x1a\x45\xdf\xa3', 'video/webm')], # matches general MKV format 77 0x1f: [(b'\x1f\x8b\x08', 'application/gzip')], 78 0x23: [ 79 (b'#! ', 'text/plain; charset=UTF-8'), 80 (b'#!/', 'text/plain; charset=UTF-8'), 81 ], 82 0x25: [(b'%PDF', 'application/pdf'), (b'%!PS', 'application/postscript')], 83 0x28: [(b'\x28\xb5\x2f\xfd', 'application/zstd')], 84 0x2e: [(b'.snd', 'audio/basic')], 85 0x47: [(b'GIF87a', 'image/gif'), (b'GIF89a', 'image/gif')], 86 0x49: [ 87 # some MP3s start with an ID3 meta-data section 88 (b'ID3\x02', 'audio/mpeg'), 89 (b'ID3\x03', 'audio/mpeg'), 90 (b'ID3\x04', 'audio/mpeg'), 91 (b'II*\x00', 'image/tiff'), 92 ], 93 0x4d: [(b'MM\x00*', 'image/tiff'), (b'MThd', 'audio/midi')], 94 0x4f: [(b'OggS', 'audio/ogg')], 95 0x50: [(b'PK\x03\x04', 'application/zip')], 96 0x53: [(b'SQLite format 3\x00', 'application/x-sqlite3')], 97 0x63: [(b'caff\x00\x01\x00\x00', 'audio/x-caf')], 98 0x66: [(b'fLaC', 'audio/x-flac')], 99 0x7b: [(b'{\\rtf', 'application/rtf')], 100 0x7f: [(b'\x7fELF', 'application/x-elf')], 101 0x89: [(b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png')], 102 0xff: [ 103 (b'\xff\xd8\xff', 'image/jpeg'), 104 # handle common ways MP3 data start 105 (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'), 106 (b'\xff\xfb', 'audio/mpeg'), 107 ], 108 } 109 110 111 # ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats 112 ftyp_types: Tuple[Tuple[bytes, str]] = ( 113 (b'M4A ', 'audio/aac'), 114 (b'M4A\x00', 'audio/aac'), 115 (b'mp42', 'video/x-m4v'), 116 (b'dash', 'audio/aac'), 117 (b'isom', 'video/mp4'), 118 # (b'isom', 'audio/aac'), 119 (b'MSNV', 'video/mp4'), 120 (b'qt ', 'video/quicktime'), 121 (b'heic', 'image/heic'), 122 (b'avif', 'image/avif'), 123 ) 124 125 # xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML 126 xmlish_heuristics: Tuple[Tuple[bytes, str]] = ( 127 (b'<html>', 'text/html'), (b'<html ', 'text/html'), 128 (b'<head>', 'text/html'), (b'<head ', 'text/html'), 129 (b'<body>', 'text/html'), (b'<body ', 'text/html'), 130 (b'<!DOCTYPE html>', 'text/html'), (b'<!DOCTYPE html ', 'text/html'), 131 (b'<svg>', 'image/svg+xml'), (b'<svg ', 'image/svg+xml'), 132 (b'<?xml>', 'application/xml'), (b'<?xml ', 'application/xml'), 133 ) 134 135 # json_heuristics helps func guess_mime auto-detect JSON via regexes: 136 # it's not perfect, but it seems effective-enough in practice 137 json_heuristics: Tuple[Pattern] = ( 138 compile_re(b'''^\\s*\\{\\s*"'''), 139 compile_re(b'''^\\s*\\{\\s*\\['''), 140 compile_re(b'''^\\s*\\[\\s*"'''), 141 compile_re(b'''^\\s*\\[\\s*\\{'''), 142 compile_re(b'''^\\s*\\[\\s*\\['''), 143 ) 144 145 146 def exact_match(header: bytes, maybe: bytes) -> bool: 147 enough_bytes = len(header) >= len(maybe) 148 return enough_bytes and all(x == y for x, y in zip(header, maybe)) 149 150 151 def match_riff(header: bytes) -> str: 152 if len(header) < 12 or not header.startswith(b'RIFF'): 153 return '' 154 155 if header.find(b'WEBP', 8, 12) == 8: 156 return 'image/webp' 157 if header.find(b'WAVE', 8, 12) == 8: 158 return 'audio/x-wav' 159 if header.find(b'AVI ', 8, 12) == 8: 160 return 'video/avi' 161 return '' 162 163 164 def match_form(header: bytes) -> str: 165 if len(header) < 12 or not header.startswith(b'FORM'): 166 return '' 167 168 if header.find(b'AIFF', 8, 12) == 8: 169 return 'audio/aiff' 170 if header.find(b'AIFC', 8, 12) == 8: 171 return 'audio/aiff' 172 return '' 173 174 175 def match_ftyp(header: bytes) -> str: 176 # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp' 177 if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4: 178 return '' 179 180 # next 4 bytes after the ASCII 'ftyp' declare the data-format 181 for marker, mime in ftyp_types: 182 if header.find(marker, 8, 12) == 8: 183 return mime 184 185 return '' 186 187 188 def guess_mime(header: bytes, fallback: str) -> str: 189 # no bytes, no match 190 if len(header) == 0: 191 return fallback 192 193 # check the MPEG-4-like formats, the RIFF formats, and AIFF audio 194 for f in (match_ftyp, match_riff, match_form): 195 m = f(header) 196 if m != '': 197 return m 198 199 # maybe it's a bitmap picture, which almost always has 40 on 15th byte 200 if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14: 201 return 'image/x-bmp' 202 203 # check general lookup-table 204 if header[0] in hdr_dispatch: 205 for maybe in hdr_dispatch[header[0]]: 206 if exact_match(header, maybe[0]): 207 return maybe[1] 208 209 # try HTML, SVG, and even generic XML 210 if header.find(b'<', 0, 8) >= 0: 211 for marker, mime in xmlish_heuristics: 212 if header.find(marker, 0, 64) >= 0: 213 return mime 214 215 # try some common cases for JSON 216 for pattern in json_heuristics: 217 if pattern.match(header): 218 return 'application/json' 219 220 # nothing matched 221 return fallback 222 223 224 def show_it(conn, start: bytes, rest) -> None: 225 # handle base64-encoded data-URIs 226 if start.startswith(b'data:'): 227 i = start.find(b';base64,', 0, 64) 228 if i > 0: 229 mime_type = str(start[len('data:'):i], encoding='utf-8') 230 encoded = BytesIO() 231 encoded.write(start[i + len(';base64,'):]) 232 encoded.write(rest.read()) 233 decoded = b64decode(encoded.getvalue()) 234 encoded.close() 235 236 inp = BytesIO(decoded) 237 if mime_type == '': 238 start = inp.read(4096) 239 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 240 show_it_as(conn, start, inp, mime_type) 241 else: 242 show_it_as(conn, bytes(), inp, mime_type) 243 return 244 245 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 246 return show_it_as(conn, start, rest, mime_type) 247 248 249 def show_it_as(conn, start: bytes, rest, mime_type: str) -> None: 250 # read-ignore all client headers 251 while True: 252 if conn.recv(1024).endswith(b'\r\n\r\n'): 253 break 254 255 # web-browsers insist on auto-downloads when given wave or flac audio 256 for e in ('audio/x-wav', 'audio/x-flac'): 257 if e == mime_type: 258 handle_sound_workaround(conn, mime_type, start, rest) 259 return 260 261 # web-browsers insist on auto-downloads when given bitmap pictures 262 if mime_type == 'image/x-bmp': 263 handle_image_workaround(conn, mime_type, start, rest) 264 return 265 266 conn.sendall(b'HTTP/1.1 200 OK\r\n') 267 conn.sendall(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8')) 268 conn.sendall(b'Content-Disposition: inline\r\nConnection: close\r\n\r\n') 269 conn.sendall(start) 270 conn.sendfile(rest) 271 272 273 def chunked_write(dest, src) -> None: 274 while True: 275 chunk = src.read(32 * 1024) 276 if not chunk: 277 return 278 dest.write(chunk) 279 280 281 def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None: 282 def emit_inner_body() -> None: 283 data = BytesIO() 284 s = f' <audio controls autofocus src="data:{mime};base64,' 285 conn.sendall(bytes(s, encoding='utf-8')) 286 data.write(start) 287 chunked_write(data, rest) 288 conn.sendall(b64encode(data.getvalue())) 289 conn.sendall(b'"></audio>\n') 290 data.close() 291 handle_workaround(conn, 'Sound', emit_inner_body) 292 293 294 def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None: 295 def emit_inner_body() -> None: 296 data = BytesIO() 297 s = bytes(f' <img src="data:{mime};base64,', encoding='utf-8') 298 conn.sendall(s) 299 data.write(start) 300 chunked_write(data, rest) 301 conn.sendall(b64encode(data.getvalue())) 302 conn.sendall(b'">\n') 303 data.close() 304 handle_workaround(conn, 'Bitmap Picture', emit_inner_body) 305 306 307 start = ''' 308 <!DOCTYPE html> 309 <html lang="en"> 310 <head> 311 <meta charset="UTF-8"> 312 <link rel="icon" href="data:,"> 313 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 314 <title>%%</title> 315 <style> 316 body { margin: auto; } 317 audio { display: block; margin: auto; width: 90vw; } 318 img { display: block; margin: auto; } 319 </style> 320 </head> 321 <body> 322 ''' 323 324 def handle_workaround(conn, title: str, handle_inner_body: Callable) -> None: 325 s = bytes(start.replace('%%', title).lstrip('\n'), encoding='utf-8') 326 conn.sendall(b'HTTP/1.1 200 OK\r\n') 327 conn.sendall(b'Content-Type: text/html; charset=UTF-8\r\n') 328 conn.sendall(b'Content-Disposition: inline\r\nConnection: close\r\n\r\n') 329 conn.sendall(s) 330 handle_inner_body() 331 conn.sendall(b'</body>\n</html>\n') 332 333 334 try: 335 # opening socket on port 0 randomly picks an available port 336 sock = socket() 337 sock.bind(('localhost', 0)) 338 port = sock.getsockname()[1] 339 sock.settimeout(10.0) 340 # only handle one thing at a time, since it's a one-off server 341 sock.listen(1) 342 343 open_new_tab(f'http://localhost:{port}') 344 345 # handle only a single request-response cycle 346 conn, addr = sock.accept() 347 show_it(conn, stdin.buffer.read(4096), stdin.buffer) 348 conn.close() 349 350 sock.close() 351 except Exception as e: 352 print(str(e), file=stderr) 353 exit(1)