File: si.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from base64 import b64decode, b64encode 27 from io import BytesIO 28 from re import compile as compile_re, Pattern 29 from socket import socket 30 from sys import argv, exit, stderr, stdin 31 from typing import Callable, Dict, List, Tuple 32 from webbrowser import open_new_tab 33 34 35 info = ''' 36 si [options...] 37 38 39 Show It shows data read from standard-input, using your default web browser 40 by auto-opening tabs, auto-detecing the data-format, and using a random port 41 among those available. 42 43 The localhost connection is available only until all data are transferred: 44 this means refreshing your browser tab will lose your content, replacing it 45 with a server-not-found message page. 46 47 Dozens of common data-formats are recognized when piped from stdin, such as 48 49 - HTML (web pages) 50 - PDF 51 - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP) 52 - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 53 - video (MP4, MOV, WEBM, MKV, AVI) 54 - JSON 55 - generic UTF-8 plain-text 56 ''' 57 58 # handle standard help cmd-line options, quitting right away in that case 59 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 60 print(info.strip(), file=stderr) 61 exit(0) 62 63 64 # hdr_dispatch groups format-description-groups by their first byte, thus 65 # shortening total lookups for some data header: notice how the `ftyp` data 66 # formats aren't handled here, since these can include any byte in parts of 67 # their first few bytes 68 hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = { 69 0x00: [ 70 (b'\x00\x00\x01\xba', 'video/mpeg'), 71 (b'\x00\x00\x01\xb3', 'video/mpeg'), 72 (b'\x00\x00\x01\x00', 'image/x-icon'), 73 (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'), 74 ], 75 0x1a: [(b'\x1a\x45\xdf\xa3', 'video/webm')], # matches general MKV format 76 0x23: [ 77 (b'#! ', 'text/plain; charset=UTF-8'), 78 (b'#!/', 'text/plain; charset=UTF-8'), 79 ], 80 0x25: [(b'%PDF', 'application/pdf'), (b'%!PS', 'application/postscript')], 81 0x2e: [(b'.snd', 'audio/basic')], 82 0x47: [(b'GIF87a', 'image/gif'), (b'GIF89a', 'image/gif')], 83 0x49: [ 84 # some MP3s start with an ID3 meta-data section 85 (b'ID3\x02', 'audio/mpeg'), 86 (b'ID3\x03', 'audio/mpeg'), 87 (b'ID3\x04', 'audio/mpeg'), 88 (b'II*\x00', 'image/tiff'), 89 ], 90 0x4d: [(b'MM\x00*', 'image/tiff'), (b'MThd', 'audio/midi')], 91 0x4f: [(b'OggS', 'audio/ogg')], 92 0x63: [(b'caff\x00\x01\x00\x00', 'audio/x-caf')], 93 0x66: [(b'fLaC', 'audio/x-flac')], 94 0x89: [(b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png')], 95 0xff: [ 96 (b'\xff\xd8\xff', 'image/jpeg'), 97 # handle common ways MP3 data start 98 (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'), 99 (b'\xff\xfb', 'audio/mpeg'), 100 ], 101 } 102 103 104 # ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats 105 ftyp_types: Tuple[Tuple[bytes, str]] = ( 106 (b'M4A ', 'audio/aac'), 107 (b'M4A\x00', 'audio/aac'), 108 (b'dash', 'audio/aac'), 109 (b'isom', 'video/mp4'), 110 # (b'isom', 'audio/aac'), 111 (b'MSNV', 'video/mp4'), 112 (b'qt ', 'video/quicktime'), 113 (b'heic', 'image/heic'), 114 (b'avif', 'image/avif'), 115 ) 116 117 # xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML 118 xmlish_heuristics: Tuple[Tuple[bytes, str]] = ( 119 (b'<html>', 'text/html'), 120 (b'<html ', 'text/html'), 121 (b'<head>', 'text/html'), 122 (b'<body>', 'text/html'), 123 (b'<!DOCTYPE html>', 'text/html'), 124 (b'<svg>', 'image/svg+xml'), 125 (b'<svg ', 'image/svg+xml'), 126 (b'<?xml', 'application/xml'), 127 ) 128 129 # json_heuristics helps func guess_mime auto-detect JSON via regexes: 130 # it's not perfect, but it seems effective-enough in practice 131 json_heuristics: Tuple[Pattern] = ( 132 compile_re(b'''^\\s*\\{\\s*"'''), 133 compile_re(b'''^\\s*\\{\\s*\\['''), 134 compile_re(b'''^\\s*\\[\\s*"'''), 135 compile_re(b'''^\\s*\\[\\s*\\{'''), 136 compile_re(b'''^\\s*\\[\\s*\\['''), 137 ) 138 139 140 def exact_match(header: bytes, maybe: bytes) -> bool: 141 if len(header) < len(maybe): 142 # not enough bytes to tell if input data match 143 return False 144 return all(x == y for x, y in zip(header, maybe)) 145 146 147 def match_riff(header: bytes) -> str: 148 'Handle a few special cases for func guess_mime.' 149 150 if len(header) < 12 or not header.startswith(b'RIFF'): 151 return '' 152 153 if header.find(b'WEBP', 8, 12) == 8: 154 return 'image/webp' 155 if header.find(b'WAVE', 8, 12) == 8: 156 return 'audio/x-wav' 157 if header.find(b'AVI ', 8, 12) == 8: 158 return 'video/avi' 159 return '' 160 161 162 def match_form(header: bytes) -> str: 163 'Handle a few special cases for func guess_mime.' 164 165 if len(header) < 12 or not header.startswith(b'FORM'): 166 return '' 167 168 if header.find(b'AIFF', 8, 12) == 8: 169 return 'audio/aiff' 170 if header.find(b'AIFC', 8, 12) == 8: 171 return 'audio/aiff' 172 return '' 173 174 175 def match_ftyp(header: bytes) -> str: 176 'Handle a few special cases for func guess_mime.' 177 178 # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp' 179 if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4: 180 return '' 181 182 # next 4 bytes after the ASCII 'ftyp' declare the data-format 183 for marker, mime in ftyp_types: 184 if header.find(marker, 8, 12) == 8: 185 return mime 186 187 # unrecognized MPEG-4-style data-format 188 return '' 189 190 191 def guess_mime(header: bytes, fallback: str) -> str: 192 'Try to auto-detect common MIME-types, given the first few input bytes.' 193 194 # no bytes, no match 195 if len(header) == 0: 196 return fallback 197 198 # check the RIFF formats, AIFF audio, and MPEG-4-like formats 199 for f in (match_riff, match_form, match_ftyp): 200 m = f(header) 201 if m != '': 202 return m 203 204 # maybe it's a bitmap picture, which almost always has 40 on 15th byte 205 if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14: 206 return 'image/x-bmp' 207 208 # check general lookup-table 209 if header[0] in hdr_dispatch: 210 for maybe in hdr_dispatch[header[0]]: 211 if exact_match(header, maybe[0]): 212 return maybe[1] 213 214 # try HTML, SVG, and even XML 215 if header.find(b'<', 0, 8) >= 0: 216 for marker, mime in xmlish_heuristics: 217 if header.find(marker, 0, 64) >= 0: 218 return mime 219 220 # try some common cases for JSON 221 for pattern in json_heuristics: 222 if pattern.match(header): 223 return 'application/json' 224 225 # nothing matched 226 return fallback 227 228 229 def show_it(conn, start: bytes, rest) -> None: 230 'Handle both normal input and data-URIs.' 231 232 # handle base64-encoded data-URIs 233 if start.startswith(b'data:'): 234 i = start.find(b';base64,', 0, 64) 235 if i > 0: 236 mime_type = str(start[len('data:'):i], encoding='utf-8') 237 encoded = BytesIO() 238 encoded.write(start[i + len(';base64,'):]) 239 encoded.write(rest.read()) 240 decoded = b64decode(encoded.getvalue()) 241 encoded.close() 242 243 inp = BytesIO(decoded) 244 if mime_type == '': 245 start = inp.read(4096) 246 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 247 show_it_as(conn, start, inp, mime_type) 248 else: 249 show_it_as(conn, bytes(), inp, mime_type) 250 return 251 252 mime_type = guess_mime(start, 'text/plain; charset=UTF-8') 253 return show_it_as(conn, start, rest, mime_type) 254 255 256 def show_it_as(conn, start: bytes, rest, mime_type: str) -> None: 257 'This is where the web-serving action happens.' 258 259 # read-ignore all client headers 260 while True: 261 if conn.recv(1024).endswith(b'\r\n\r\n'): 262 break 263 264 # web-browsers insist on auto-downloads when given wave or flac audio 265 for e in ('audio/x-wav', 'audio/x-flac'): 266 if e == mime_type: 267 handle_sound_workaround(conn, mime_type, start, rest) 268 return 269 270 # web-browsers insist on auto-downloads when given bitmap pictures 271 if mime_type == 'image/x-bmp': 272 handle_image_workaround(conn, mime_type, start, rest) 273 return 274 275 # handle all other data formats 276 o = conn.sendall 277 278 o(b'HTTP/1.1 200 OK\r\n') 279 o(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8')) 280 o(b'Content-Disposition: inline\r\n') 281 # tell browser this is the only/last request 282 o(b'Connection: close\r\n') 283 # payload starts right after an empty line 284 o(b'\r\n') 285 286 # send all input bytes 287 o(start) 288 conn.sendfile(rest) 289 290 291 def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None: 292 data = BytesIO() 293 pre = f' <audio controls autofocus src="data:{mime};base64,' 294 295 def emit_inner_body() -> None: 296 conn.sendall(bytes(pre, encoding='utf-8')) 297 data.write(start) 298 data.write(rest.read()) 299 conn.sendall(b64encode(data.getvalue())) 300 conn.sendall(b'"></audio>\n') 301 302 handle_workaround(conn, 'Wave-Audio Sound', emit_inner_body) 303 data.close() 304 305 306 def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None: 307 data = BytesIO() 308 pre = f' <img src="data:{mime};base64,' 309 310 def emit_inner_body() -> None: 311 conn.sendall(bytes(pre, encoding='utf-8')) 312 data.write(start) 313 data.write(rest.read()) 314 conn.sendall(b64encode(data.getvalue())) 315 conn.sendall(b'">\n') 316 317 handle_workaround(conn, 'Bitmap Picture', emit_inner_body) 318 data.close() 319 320 321 def handle_workaround(conn, title: str, handle_inner_body: Callable) -> None: 322 'Avoid annoying auto-download web-browser behavior.' 323 324 o = conn.sendall 325 326 o(b'HTTP/1.1 200 OK\r\n') 327 # auto-detect content-type, and announce it to the client 328 o(b'Content-Type: text/html; charset=UTF-8\r\n') 329 # discourage web-browsers' download-dialogs and/or auto-downloads 330 o(b'Content-Disposition: inline\r\n') 331 # tell browser this is the last request 332 o(b'Connection: close\r\n') 333 # payload starts right after an empty line 334 o(b'\r\n') 335 336 # emit HTML work-around 337 o(b'<!DOCTYPE html>\n') 338 o(b'<html lang="en">\n') 339 o(b'<head>\n') 340 o(b' <meta charset="UTF-8">\n') 341 o(b' <link rel="icon" href="data:,">\n') 342 o(b' <meta name="viewport"') 343 o(b' content="width=device-width, initial-scale=1.0">\n') 344 o(bytes(f' <title>{title}</title>\n', encoding='utf-8')) 345 o(b' <style>\n') 346 o(b' body { margin: auto; }\n') 347 o(b' audio { display: block; margin: auto; width: 90vw; }\n') 348 o(b' img { display: block; margin: auto; }\n') 349 o(b' </style>\n') 350 o(b'</head>\n') 351 o(b'<body>\n') 352 handle_inner_body() 353 o(b'</body>\n') 354 o(b'</html>\n') 355 356 357 try: 358 # opening socket on port 0 randomly picks an available port 359 sock = socket() 360 sock.bind(('localhost', 0)) 361 port = sock.getsockname()[1] 362 sock.settimeout(10.0) 363 # only handle one thing at a time, since it's a one-off server 364 sock.listen(1) 365 366 open_new_tab(f'http://localhost:{port}') 367 368 # handle only a single request-response cycle 369 conn, addr = sock.accept() 370 show_it(conn, stdin.buffer.read(4096), stdin.buffer) 371 conn.close() 372 373 sock.close() 374 except Exception as e: 375 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 376 exit(1)