#!/usr/bin/python3 # The MIT License (MIT) # # Copyright © 2024 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from base64 import b64decode, b64encode from io import BytesIO from re import compile as compile_re, Pattern from socket import socket from sys import argv, exit, stderr, stdin from typing import Callable, Dict, List, Tuple from webbrowser import open_new_tab info = ''' si [options...] Show It shows data read from standard-input, using your default web browser by auto-opening tabs, auto-detecing the data-format, and using a random port among those available. The localhost connection is available only until all data are transferred: this means refreshing your browser tab will lose your content, replacing it with a server-not-found message page. Dozens of common data-formats are recognized when piped from stdin, such as - HTML (web pages) - PDF - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP) - audio (AAC, MP3, FLAC, WAV, AU, MIDI) - video (MP4, MOV, WEBM, MKV, AVI) - JSON - generic UTF-8 plain-text ''' # handle standard help cmd-line options, quitting right away in that case if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): print(info.strip(), file=stderr) exit(0) # hdr_dispatch groups format-description-groups by their first byte, thus # shortening total lookups for some data header: notice how the `ftyp` data # formats aren't handled here, since these can include any byte in parts of # their first few bytes hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = { 0x00: [ (b'\x00\x00\x01\xba', 'video/mpeg'), (b'\x00\x00\x01\xb3', 'video/mpeg'), (b'\x00\x00\x01\x00', 'image/x-icon'), (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'), ], 0x1a: [(b'\x1a\x45\xdf\xa3', 'video/webm')], # matches general MKV format 0x23: [ (b'#! ', 'text/plain; charset=UTF-8'), (b'#!/', 'text/plain; charset=UTF-8'), ], 0x25: [(b'%PDF', 'application/pdf'), (b'%!PS', 'application/postscript')], 0x2e: [(b'.snd', 'audio/basic')], 0x47: [(b'GIF87a', 'image/gif'), (b'GIF89a', 'image/gif')], 0x49: [ # some MP3s start with an ID3 meta-data section (b'ID3\x02', 'audio/mpeg'), (b'ID3\x03', 'audio/mpeg'), (b'ID3\x04', 'audio/mpeg'), (b'II*\x00', 'image/tiff'), ], 0x4d: [(b'MM\x00*', 'image/tiff'), (b'MThd', 'audio/midi')], 0x4f: [(b'OggS', 'audio/ogg')], 0x63: [(b'caff\x00\x01\x00\x00', 'audio/x-caf')], 0x66: [(b'fLaC', 'audio/x-flac')], 0x89: [(b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png')], 0xff: [ (b'\xff\xd8\xff', 'image/jpeg'), # handle common ways MP3 data start (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'), (b'\xff\xfb', 'audio/mpeg'), ], } # ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats ftyp_types: Tuple[Tuple[bytes, str]] = ( (b'M4A ', 'audio/aac'), (b'M4A\x00', 'audio/aac'), (b'dash', 'audio/aac'), (b'isom', 'video/mp4'), # (b'isom', 'audio/aac'), (b'MSNV', 'video/mp4'), (b'qt ', 'video/quicktime'), (b'heic', 'image/heic'), (b'avif', 'image/avif'), ) # xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML xmlish_heuristics: Tuple[Tuple[bytes, str]] = ( (b'', 'text/html'), (b'', 'text/html'), (b'', 'text/html'), (b'', 'text/html'), (b'', 'image/svg+xml'), (b' bool: if len(header) < len(maybe): # not enough bytes to tell if input data match return False return all(x == y for x, y in zip(header, maybe)) def match_riff(header: bytes) -> str: 'Handle a few special cases for func guess_mime.' if len(header) < 12 or not header.startswith(b'RIFF'): return '' if header.find(b'WEBP', 8, 12) == 8: return 'image/webp' if header.find(b'WAVE', 8, 12) == 8: return 'audio/x-wav' if header.find(b'AVI ', 8, 12) == 8: return 'video/avi' return '' def match_form(header: bytes) -> str: 'Handle a few special cases for func guess_mime.' if len(header) < 12 or not header.startswith(b'FORM'): return '' if header.find(b'AIFF', 8, 12) == 8: return 'audio/aiff' if header.find(b'AIFC', 8, 12) == 8: return 'audio/aiff' return '' def match_ftyp(header: bytes) -> str: 'Handle a few special cases for func guess_mime.' # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp' if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4: return '' # next 4 bytes after the ASCII 'ftyp' declare the data-format for marker, mime in ftyp_types: if header.find(marker, 8, 12) == 8: return mime # unrecognized MPEG-4-style data-format return '' def guess_mime(header: bytes, fallback: str) -> str: 'Try to auto-detect common MIME-types, given the first few input bytes.' # no bytes, no match if len(header) == 0: return fallback # check the RIFF formats, AIFF audio, and MPEG-4-like formats for f in (match_riff, match_form, match_ftyp): m = f(header) if m != '': return m # maybe it's a bitmap picture, which almost always has 40 on 15th byte if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14: return 'image/x-bmp' # check general lookup-table if header[0] in hdr_dispatch: for maybe in hdr_dispatch[header[0]]: if exact_match(header, maybe[0]): return maybe[1] # try HTML, SVG, and even XML if header.find(b'<', 0, 8) >= 0: for marker, mime in xmlish_heuristics: if header.find(marker, 0, 64) >= 0: return mime # try some common cases for JSON for pattern in json_heuristics: if pattern.match(header): return 'application/json' # nothing matched return fallback def show_it(conn, start: bytes, rest) -> None: 'Handle both normal input and data-URIs.' # handle base64-encoded data-URIs if start.startswith(b'data:'): i = start.find(b';base64,', 0, 64) if i > 0: mime_type = str(start[len('data:'):i], encoding='utf-8') encoded = BytesIO() encoded.write(start[i + len(';base64,'):]) encoded.write(rest.read()) decoded = b64decode(encoded.getvalue()) encoded.close() inp = BytesIO(decoded) if mime_type == '': start = inp.read(4096) mime_type = guess_mime(start, 'text/plain; charset=UTF-8') show_it_as(conn, start, inp, mime_type) else: show_it_as(conn, bytes(), inp, mime_type) return mime_type = guess_mime(start, 'text/plain; charset=UTF-8') return show_it_as(conn, start, rest, mime_type) def show_it_as(conn, start: bytes, rest, mime_type: str) -> None: 'This is where the web-serving action happens.' # read-ignore all client headers while True: if conn.recv(1024).endswith(b'\r\n\r\n'): break # web-browsers insist on auto-downloads when given wave or flac audio for e in ('audio/x-wav', 'audio/x-flac'): if e == mime_type: handle_sound_workaround(conn, mime_type, start, rest) return # web-browsers insist on auto-downloads when given bitmap pictures if mime_type == 'image/x-bmp': handle_image_workaround(conn, mime_type, start, rest) return # handle all other data formats o = conn.sendall o(b'HTTP/1.1 200 OK\r\n') o(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8')) o(b'Content-Disposition: inline\r\n') # tell browser this is the only/last request o(b'Connection: close\r\n') # payload starts right after an empty line o(b'\r\n') # send all input bytes o(start) conn.sendfile(rest) def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None: data = BytesIO() pre = f' \n') handle_workaround(conn, 'Wave-Audio Sound', emit_inner_body) data.close() def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None: data = BytesIO() pre = f' \n') handle_workaround(conn, 'Bitmap Picture', emit_inner_body) data.close() def handle_workaround(conn, title: str, handle_inner_body: Callable) -> None: 'Avoid annoying auto-download web-browser behavior.' o = conn.sendall o(b'HTTP/1.1 200 OK\r\n') # auto-detect content-type, and announce it to the client o(b'Content-Type: text/html; charset=UTF-8\r\n') # discourage web-browsers' download-dialogs and/or auto-downloads o(b'Content-Disposition: inline\r\n') # tell browser this is the last request o(b'Connection: close\r\n') # payload starts right after an empty line o(b'\r\n') # emit HTML work-around o(b'\n') o(b'\n') o(b'\n') o(b' \n') o(b' \n') o(b' \n') o(bytes(f' {title}\n', encoding='utf-8')) o(b' \n') o(b'\n') o(b'\n') handle_inner_body() o(b'\n') o(b'\n') try: # opening socket on port 0 randomly picks an available port sock = socket() sock.bind(('localhost', 0)) port = sock.getsockname()[1] sock.settimeout(10.0) # only handle one thing at a time, since it's a one-off server sock.listen(1) open_new_tab(f'http://localhost:{port}') # handle only a single request-response cycle conn, addr = sock.accept() show_it(conn, stdin.buffer.read(4096), stdin.buffer) conn.close() sock.close() except Exception as e: print(f'\x1b[31m{e}\x1b[0m', file=stderr) exit(1)