File: si.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # si [options...] 27 # 28 # Show It shows data read from standard-input, using your default web browser 29 # by auto-opening tabs, auto-detecing the data-format, and using a random port 30 # among those available. 31 # 32 # The localhost connection is available only until all data are transferred: 33 # this means refreshing your browser tab will lose your content, replacing it 34 # with a server-not-found message page. 35 # 36 # Dozens of common data-formats are recognized when piped from stdin, such as 37 # 38 # - HTML (web pages) 39 # - PDF 40 # - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP) 41 # - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 42 # - video (MP4, MOV, WEBM, MKV, AVI) 43 # - JSON 44 # - generic UTF-8 plain-text 45 46 47 from base64 import b64decode, b64encode 48 from io import BytesIO 49 from re import compile as compile_re, Pattern 50 from socket import socket 51 from sys import argv, exit, stderr, stdin 52 from typing import Dict, List, Tuple 53 from webbrowser import open_new_tab 54 55 56 # info is the help message shown when asked to 57 info = ''' 58 si [options...] 59 60 Show It shows data read from standard-input, using your default web browser 61 by auto-opening tabs, auto-detecing the data-format, and using a random port 62 among those available. 63 64 The localhost connection is available only until all data are transferred: 65 this means refreshing your browser tab will lose your content, replacing it 66 with a server-not-found message page. 67 68 Dozens of common data-formats are recognized when piped from stdin, such as 69 70 - HTML (web pages) 71 - PDF 72 - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP) 73 - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 74 - video (MP4, MOV, WEBM, MKV, AVI) 75 - JSON 76 - generic UTF-8 plain-text 77 '''.strip() 78 79 # handle standard help cmd-line options, quitting right away in that case 80 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 81 print(info, file=stderr) 82 exit(0) 83 84 85 # hdr_dispatch groups format-description-groups by their first byte, thus 86 # shortening total lookups for some data header: notice how the `ftyp` data 87 # formats aren't handled here, since these can include any byte in parts of 88 # their first few bytes 89 hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = { 90 0x00: [ 91 (b'\x00\x00\x01\xba', 'video/mpeg'), 92 (b'\x00\x00\x01\xb3', 'video/mpeg'), 93 (b'\x00\x00\x01\x00', 'image/x-icon'), 94 (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'), 95 ], 96 97 0x1a: [ 98 # handle webm and MKV, which is practically like webm 99 (b'\x1a\x45\xdf\xa3', 'video/webm'), 100 ], 101 102 0x23: [ 103 (b'#! ', 'text/plain; charset=UTF-8'), 104 (b'#!/', 'text/plain; charset=UTF-8'), 105 ], 106 107 0x25: [ 108 (b'%PDF', 'application/pdf'), 109 (b'%!PS', 'application/postscript'), 110 ], 111 112 0x2e: [ 113 (b'.snd', 'audio/basic'), 114 ], 115 116 0x47: [ 117 (b'GIF87a', 'image/gif'), 118 (b'GIF89a', 'image/gif'), 119 ], 120 121 0x49: [ 122 # handle MP3 which start with an ID3 meta-data section 123 (b'ID3\x02', 'audio/mpeg'), 124 (b'ID3\x03', 'audio/mpeg'), 125 (b'ID3\x04', 'audio/mpeg'), 126 127 (b'II*\x00', 'image/tiff'), 128 ], 129 130 0x4d: [ 131 (b'MM\x00*', 'image/tiff'), 132 (b'MThd', 'audio/midi'), 133 ], 134 135 0x4f: [ 136 # the opus audio-format is usually inside an ogg-format container 137 (b'OggS', 'audio/ogg'), 138 ], 139 140 0x63: [ 141 (b'caff\x00\x01\x00\x00', 'audio/x-caf'), 142 ], 143 144 0x66: [ 145 (b'fLaC', 'audio/x-flac'), 146 ], 147 148 0x89: [ 149 (b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png'), 150 ], 151 152 0xff: [ 153 (b'\xff\xd8\xff', 'image/jpeg'), 154 155 # handle common ways MP3 data start 156 (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'), 157 (b'\xff\xfb', 'audio/mpeg'), 158 ], 159 } 160 161 162 # ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats 163 ftyp_types: Tuple[Tuple[bytes, str]] = ( 164 (b'M4A ', 'audio/aac'), 165 (b'M4A\x00', 'audio/aac'), 166 (b'dash', 'audio/aac'), 167 (b'isom', 'video/mp4'), 168 # (b'isom', 'audio/aac'), 169 (b'MSNV', 'video/mp4'), 170 (b'qt ', 'video/quicktime'), 171 (b'heic', 'image/heic'), 172 (b'avif', 'image/avif'), 173 ) 174 175 # xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML 176 xmlish_heuristics: Tuple[Tuple[bytes, str]] = ( 177 (b'<html>', 'text/html'), 178 (b'<html ', 'text/html'), 179 (b'<head>', 'text/html'), 180 (b'<body>', 'text/html'), 181 (b'<!DOCTYPE html>', 'text/html'), 182 (b'<svg>', 'image/svg+xml'), 183 (b'<svg ', 'image/svg+xml'), 184 (b'<?xml', 'application/xml'), 185 ) 186 187 # json_heuristics helps func guess_mime auto-detect JSON via regexes: 188 # it's not perfect, but it seems effective-enough in practice 189 json_heuristics: Tuple[Pattern] = ( 190 compile_re(b'''^\\s*\\{\\s*"'''), 191 compile_re(b'''^\\s*\\{\\s*\\['''), 192 compile_re(b'''^\\s*\\[\\s*"'''), 193 compile_re(b'''^\\s*\\[\\s*\\{'''), 194 compile_re(b'''^\\s*\\[\\s*\\['''), 195 ) 196 197 198 def exact_match(header: bytes, maybe: bytes) -> bool: 199 if len(header) < len(maybe): 200 # not enough bytes to tell if input data match 201 return False 202 return all(x == y for x, y in zip(header, maybe)) 203 204 205 def match_riff(header: bytes) -> str: 206 '''Handle a few special cases for func guess_mime.''' 207 208 if len(header) < 12 or not header.startswith(b'RIFF'): 209 return '' 210 211 if header.find(b'WEBP', 8, 12) == 8: 212 return 'image/webp' 213 if header.find(b'WAVE', 8, 12) == 8: 214 return 'audio/x-wav' 215 if header.find(b'AVI ', 8, 12) == 8: 216 return 'video/avi' 217 return '' 218 219 220 def match_form(header: bytes) -> str: 221 '''Handle a few special cases for func guess_mime.''' 222 223 if len(header) < 12 or not header.startswith(b'FORM'): 224 return '' 225 226 if header.find(b'AIFF', 8, 12) == 8: 227 return 'audio/aiff' 228 if header.find(b'AIFC', 8, 12) == 8: 229 return 'audio/aiff' 230 return '' 231 232 233 def match_ftyp(header: bytes) -> str: 234 '''Handle a few special cases for func guess_mime.''' 235 236 # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp' 237 if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4: 238 return '' 239 240 # next 4 bytes after the ASCII 'ftyp' declare the data-format 241 for marker, mime in ftyp_types: 242 if header.find(marker, 8, 12) == 8: 243 return mime 244 245 # unrecognized MPEG-4-style data-format 246 return '' 247 248 249 def guess_mime(header: bytes, fallback: str) -> str: 250 ''' 251 Try to auto-detect MIME-types for common file-types, given the first 252 few bytes read from them. 253 ''' 254 255 # no bytes, no match 256 if len(header) == 0: 257 return fallback 258 259 # maybe it's one of the RIFF formats 260 m = match_riff(header) 261 if m != '': 262 return m 263 264 # maybe it's AIFF audio 265 m = match_form(header) 266 if m != '': 267 return m 268 269 # maybe it's an MPEG-4-like format 270 m = match_ftyp(header) 271 if m != '': 272 return m 273 274 # maybe it's a bitmap picture, which almost always has 40 on 15th byte 275 if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14: 276 return 'image/x-bmp' 277 278 # check general lookup-table 279 if header[0] in hdr_dispatch: 280 for maybe in hdr_dispatch[header[0]]: 281 if exact_match(header, maybe[0]): 282 return maybe[1] 283 284 # try HTML, SVG, and even XML 285 if header.find(b'<', 0, 8) >= 0: 286 for marker, mime in xmlish_heuristics: 287 if header.find(marker, 0, 64) >= 0: 288 return mime 289 290 # try some common cases for JSON 291 for pattern in json_heuristics: 292 if pattern.match(header): 293 return 'application/json' 294 295 # nothing matched 296 return fallback 297 298 299 def show_it(conn, start: bytes, rest) -> None: 300 '''Handle both normal input and data-URIs.''' 301 302 # handle base64-encoded data-URIs 303 if start.startswith(b'data:'): 304 i = start.find(b';base64,', 0, 64) 305 if i > 0: 306 mime_type = str(start[len('data:'):i], encoding='utf-8') 307 encoded = BytesIO() 308 encoded.write(start[i + len(';base64,'):]) 309 encoded.write(rest.read()) 310 decoded = b64decode(encoded.getvalue()) 311 encoded.close() 312 313 inp = BytesIO(decoded) 314 if mime_type == '': 315 start = inp.read(4096) 316 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 317 show_it_as(conn, start, inp, mime_type) 318 else: 319 show_it_as(conn, bytes(), inp, mime_type) 320 return 321 322 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 323 return show_it_as(conn, start, rest, mime_type) 324 325 326 def show_it_as(conn, start: bytes, rest, mime_type: str) -> None: 327 '''This is where the web-serving action happens.''' 328 329 # read-ignore all client headers 330 while True: 331 if conn.recv(1024).endswith(b'\r\n\r\n'): 332 break 333 334 # web-browsers insist on auto-downloads when given wave or flac audio 335 for e in ('audio/x-wav', 'audio/x-flac'): 336 if e == mime_type: 337 handle_sound_workaround(conn, mime_type, start, rest) 338 return 339 340 # web-browsers insist on auto-downloads when given bitmap pictures 341 if mime_type == 'image/x-bmp': 342 handle_image_workaround(conn, mime_type, start, rest) 343 return 344 345 # handle all other data formats 346 347 conn.sendall(b'HTTP/1.1 200 OK\r\n') 348 conn.sendall(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8')) 349 conn.sendall(b'Content-Disposition: inline\r\n') 350 # tell browser this is the only/last request 351 conn.sendall(b'Connection: close\r\n') 352 # payload starts right after an empty line 353 conn.sendall(b'\r\n') 354 355 # send all input bytes 356 conn.sendall(start) 357 conn.sendfile(rest) 358 359 360 def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None: 361 data = BytesIO() 362 pre = f' <audio controls autofocus src="data:{mime};base64,' 363 364 def emit_inner_body() -> None: 365 conn.sendall(bytes(pre, encoding='utf-8')) 366 data.write(start) 367 data.write(rest.read()) 368 conn.sendall(b64encode(data.getvalue())) 369 conn.sendall(b'"></audio>\n') 370 371 handle_workaround(conn, 'Wave-Audio Sound', emit_inner_body) 372 data.close() 373 374 375 def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None: 376 data = BytesIO() 377 pre = f' <img src="data:{mime};base64,' 378 379 def emit_inner_body() -> None: 380 conn.sendall(bytes(pre, encoding='utf-8')) 381 data.write(start) 382 data.write(rest.read()) 383 conn.sendall(b64encode(data.getvalue())) 384 conn.sendall(b'">\n') 385 386 handle_workaround(conn, 'Bitmap Picture', emit_inner_body) 387 data.close() 388 389 390 def handle_workaround(conn, title: str, handle_inner_body) -> None: 391 ''' 392 Avoid annoying web-browser behavior when given wave-audio data 393 and/or bitmap pictures. 394 ''' 395 396 conn.sendall(b'HTTP/1.1 200 OK\r\n') 397 # auto-detect content-type, and announce it to the client 398 conn.sendall(b'Content-Type: text/html; charset=UTF-8\r\n') 399 # discourage web-browsers' download-dialogs and/or auto-downloads 400 conn.sendall(b'Content-Disposition: inline\r\n') 401 # tell browser this is the last request 402 conn.sendall(b'Connection: close\r\n') 403 # payload starts right after an empty line 404 conn.sendall(b'\r\n') 405 406 # emit HTML work-around 407 conn.sendall(b'<!DOCTYPE html>\n') 408 conn.sendall(b'<html lang="en">\n') 409 conn.sendall(b'<head>\n') 410 conn.sendall(b' <meta charset="UTF-8">\n') 411 conn.sendall(b' <link rel="icon" href="data:,">\n') 412 conn.sendall(b' <meta name="viewport"') 413 conn.sendall(b' content="width=device-width, initial-scale=1.0">\n') 414 conn.sendall(bytes(f' <title>{title}</title>\n', encoding='utf-8')) 415 conn.sendall(b' <style>\n') 416 conn.sendall(b' body { margin: auto; }\n') 417 conn.sendall(b' ') 418 conn.sendall(b'audio { display: block; margin: auto; width: 90vw; }\n') 419 conn.sendall(b' img { display: block; margin: auto; }\n') 420 conn.sendall(b' </style>\n') 421 conn.sendall(b'</head>\n') 422 conn.sendall(b'<body>\n') 423 handle_inner_body() 424 conn.sendall(b'</body>\n') 425 conn.sendall(b'</html>\n') 426 427 428 def check_mime_table(table: Dict[int, List[Tuple[bytes, str]]]) -> None: 429 '''Check table used for data-format auto-detection.''' 430 431 for k, v in table.items(): 432 for i, m in enumerate(v): 433 if len(m[0]) < 1 or m[0][0] != k: 434 k = hex(k) 435 v = hex(m[0][0]) 436 msg = f'{k}: wrong first byte ({v}) in entry (index {i})' 437 raise ValueError(msg) 438 439 440 try: 441 check_mime_table(hdr_dispatch) 442 443 # opening socket on port 0 randomly picks an available port 444 sock = socket() 445 sock.bind(('localhost', 0)) 446 port = sock.getsockname()[1] 447 sock.settimeout(10.0) 448 # only handle one thing at a time, since it's a one-off server 449 sock.listen(1) 450 451 open_new_tab(f'http://localhost:{port}') 452 453 # handle only a single request-response cycle 454 conn, addr = sock.accept() 455 show_it(conn, stdin.buffer.read(4096), stdin.buffer) 456 conn.close() 457 458 sock.close() 459 except Exception as e: 460 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 461 exit(1)