#!/usr/bin/python3
# The MIT License (MIT)
#
# Copyright © 2024 pacman64
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from base64 import b64decode, b64encode
from io import BytesIO
from re import compile as compile_re, Pattern
from socket import socket
from sys import argv, exit, stderr, stdin
from typing import Callable, Dict, List, Tuple
from webbrowser import open_new_tab
info = '''
si [options...]
Show It shows data read from standard-input, using your default web browser
by auto-opening tabs, auto-detecing the data-format, and using a random port
among those available.
The localhost connection is available only until all data are transferred:
this means refreshing your browser tab will lose your content, replacing it
with a server-not-found message page.
Dozens of common data-formats are recognized when piped from stdin, such as
- HTML (web pages)
- PDF
- pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP)
- audio (AAC, MP3, FLAC, WAV, AU, MIDI)
- video (MP4, MOV, WEBM, MKV, AVI)
- JSON
- generic UTF-8 plain-text
'''
# handle standard help cmd-line options, quitting right away in that case
if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
print(info.strip(), file=stderr)
exit(0)
# hdr_dispatch groups format-description-groups by their first byte, thus
# shortening total lookups for some data header: notice how the `ftyp` data
# formats aren't handled here, since these can include any byte in parts of
# their first few bytes
hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = {
0x00: [
(b'\x00\x00\x01\xba', 'video/mpeg'),
(b'\x00\x00\x01\xb3', 'video/mpeg'),
(b'\x00\x00\x01\x00', 'image/x-icon'),
(b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'),
],
0x1a: [(b'\x1a\x45\xdf\xa3', 'video/webm')], # matches general MKV format
0x23: [
(b'#! ', 'text/plain; charset=UTF-8'),
(b'#!/', 'text/plain; charset=UTF-8'),
],
0x25: [(b'%PDF', 'application/pdf'), (b'%!PS', 'application/postscript')],
0x2e: [(b'.snd', 'audio/basic')],
0x47: [(b'GIF87a', 'image/gif'), (b'GIF89a', 'image/gif')],
0x49: [
# some MP3s start with an ID3 meta-data section
(b'ID3\x02', 'audio/mpeg'),
(b'ID3\x03', 'audio/mpeg'),
(b'ID3\x04', 'audio/mpeg'),
(b'II*\x00', 'image/tiff'),
],
0x4d: [(b'MM\x00*', 'image/tiff'), (b'MThd', 'audio/midi')],
0x4f: [(b'OggS', 'audio/ogg')],
0x63: [(b'caff\x00\x01\x00\x00', 'audio/x-caf')],
0x66: [(b'fLaC', 'audio/x-flac')],
0x89: [(b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png')],
0xff: [
(b'\xff\xd8\xff', 'image/jpeg'),
# handle common ways MP3 data start
(b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'),
(b'\xff\xfb', 'audio/mpeg'),
],
}
# ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats
ftyp_types: Tuple[Tuple[bytes, str]] = (
(b'M4A ', 'audio/aac'),
(b'M4A\x00', 'audio/aac'),
(b'dash', 'audio/aac'),
(b'isom', 'video/mp4'),
# (b'isom', 'audio/aac'),
(b'MSNV', 'video/mp4'),
(b'qt ', 'video/quicktime'),
(b'heic', 'image/heic'),
(b'avif', 'image/avif'),
)
# xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML
xmlish_heuristics: Tuple[Tuple[bytes, str]] = (
(b'', 'text/html'),
(b'', 'text/html'),
(b'
', 'text/html'),
(b'', 'text/html'),
(b'