#!/usr/bin/python3

# The MIT License (MIT)
#
# Copyright © 2020-2025 pacman64
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


from base64 import b64decode, b64encode
from io import BytesIO
from re import compile as compile_re, Pattern
from socket import socket
from sys import argv, exit, stderr, stdin
from typing import Callable, Dict, List, Tuple
from webbrowser import open_new_tab


info = '''
si [options...]


Show It shows data read from standard-input, using your default web browser
by auto-opening tabs, auto-detecing the data-format, and using a random port
among those available.

The localhost connection is available only until all data are transferred:
this means refreshing your browser tab will lose your content, replacing it
with a server-not-found message page.

Dozens of common data-formats are recognized when piped from stdin, such as

  - HTML (web pages)
  - PDF
  - pictures (PNG, JPEG, SVG, WEBP, HEIC, AVIF, GIF, BMP)
  - audio (AAC, MP3, FLAC, WAV, AU, MIDI)
  - video (MP4, MOV, WEBM, MKV, AVI)
  - JSON
  - generic UTF-8 plain-text
'''

# handle standard help cmd-line options, quitting right away in that case
if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
    print(info.strip())
    exit(0)


# hdr_dispatch groups format-description-groups by their first byte, thus
# shortening total lookups for some data header: notice how the `ftyp` data
# formats aren't handled here, since these can include any byte in parts of
# their first few bytes
hdr_dispatch: Dict[int, List[Tuple[bytes, str]]] = {
    0x00: [
        (b'\x00\x00\x01\xba', 'video/mpeg'),
        (b'\x00\x00\x01\xb3', 'video/mpeg'),
        (b'\x00\x00\x01\x00', 'image/x-icon'),
        (b'\x00\x00\x02\x00', 'image/vnd.microsoft.icon'),
    ],
    0x1a: [(b'\x1a\x45\xdf\xa3', 'video/webm')], # matches general MKV format
    0x23: [
        (b'#! ', 'text/plain; charset=UTF-8'),
        (b'#!/', 'text/plain; charset=UTF-8'),
    ],
    0x25: [(b'%PDF', 'application/pdf'), (b'%!PS', 'application/postscript')],
    0x2e: [(b'.snd', 'audio/basic')],
    0x47: [(b'GIF87a', 'image/gif'), (b'GIF89a', 'image/gif')],
    0x49: [
        # some MP3s start with an ID3 meta-data section
        (b'ID3\x02', 'audio/mpeg'),
        (b'ID3\x03', 'audio/mpeg'),
        (b'ID3\x04', 'audio/mpeg'),
        (b'II*\x00', 'image/tiff'),
    ],
    0x4d: [(b'MM\x00*', 'image/tiff'), (b'MThd', 'audio/midi')],
    0x4f: [(b'OggS', 'audio/ogg')],
    0x63: [(b'caff\x00\x01\x00\x00', 'audio/x-caf')],
    0x66: [(b'fLaC', 'audio/x-flac')],
    0x89: [(b'\x89PNG\x0d\x0a\x1a\x0a', 'image/png')],
    0xff: [
        (b'\xff\xd8\xff', 'image/jpeg'),
        # handle common ways MP3 data start
        (b'\xff\xf3\x48\xc4\x00', 'audio/mpeg'),
        (b'\xff\xfb', 'audio/mpeg'),
    ],
}


# ftyp_types helps func match_ftyp auto-detect MPEG-4-like formats
ftyp_types: Tuple[Tuple[bytes, str]] = (
    (b'M4A ', 'audio/aac'),
    (b'M4A\x00', 'audio/aac'),
    (b'dash', 'audio/aac'),
    (b'isom', 'video/mp4'),
    # (b'isom', 'audio/aac'),
    (b'MSNV', 'video/mp4'),
    (b'qt  ', 'video/quicktime'),
    (b'heic', 'image/heic'),
    (b'avif', 'image/avif'),
)

# xmlish_heuristics helps func guess_mime auto-detect HTML, SVG, and XML
xmlish_heuristics: Tuple[Tuple[bytes, str]] = (
    (b'<html>', 'text/html'),
    (b'<html ', 'text/html'),
    (b'<head>', 'text/html'),
    (b'<body>', 'text/html'),
    (b'<!DOCTYPE html', 'text/html'),
    (b'<svg>', 'image/svg+xml'),
    (b'<svg ', 'image/svg+xml'),
    (b'<?xml', 'application/xml'),
)

# json_heuristics helps func guess_mime auto-detect JSON via regexes:
# it's not perfect, but it seems effective-enough in practice
json_heuristics: Tuple[Pattern] = (
    compile_re(b'''^\\s*\\{\\s*"'''),
    compile_re(b'''^\\s*\\{\\s*\\['''),
    compile_re(b'''^\\s*\\[\\s*"'''),
    compile_re(b'''^\\s*\\[\\s*\\{'''),
    compile_re(b'''^\\s*\\[\\s*\\['''),
)


def exact_match(header: bytes, maybe: bytes) -> bool:
    if len(header) < len(maybe):
        # not enough bytes to tell if input data match
        return False
    return all(x == y for x, y in zip(header, maybe))


def match_riff(header: bytes) -> str:
    'Handle a few special cases for func guess_mime.'

    if len(header) < 12 or not header.startswith(b'RIFF'):
        return ''

    if header.find(b'WEBP', 8, 12) == 8:
        return 'image/webp'
    if header.find(b'WAVE', 8, 12) == 8:
        return 'audio/x-wav'
    if header.find(b'AVI ', 8, 12) == 8:
        return 'video/avi'
    return ''


def match_form(header: bytes) -> str:
    'Handle a few special cases for func guess_mime.'

    if len(header) < 12 or not header.startswith(b'FORM'):
        return ''

    if header.find(b'AIFF', 8, 12) == 8:
        return 'audio/aiff'
    if header.find(b'AIFC', 8, 12) == 8:
        return 'audio/aiff'
    return ''


def match_ftyp(header: bytes) -> str:
    'Handle a few special cases for func guess_mime.'

    # first 4 bytes can be anything, next 4 bytes must be ASCII 'ftyp'
    if len(header) < 12 or header.find(b'ftyp', 4, 8) != 4:
        return ''

    # next 4 bytes after the ASCII 'ftyp' declare the data-format
    for marker, mime in ftyp_types:
        if header.find(marker, 8, 12) == 8:
            return mime

    # unrecognized MPEG-4-style data-format
    return ''


def guess_mime(header: bytes, fallback: str) -> str:
    'Try to auto-detect common MIME-types, given the first few input bytes.'

    # no bytes, no match
    if len(header) == 0:
        return fallback

    # check the RIFF formats, AIFF audio, and MPEG-4-like formats
    for f in (match_riff, match_form, match_ftyp):
        m = f(header)
        if m != '':
            return m

    # maybe it's a bitmap picture, which almost always has 40 on 15th byte
    if header.startswith(b'BM') and header.find(b'\x28', 8, 16) == 14:
        return 'image/x-bmp'

    # check general lookup-table
    if header[0] in hdr_dispatch:
        for maybe in hdr_dispatch[header[0]]:
            if exact_match(header, maybe[0]):
                return maybe[1]

    if header.find(b'<!DOCTYPE html', 0, 64) >= 0:
        return 'text/html'

    # try HTML, SVG, and even XML
    if header.find(b'<', 0, 8) >= 0:
        for marker, mime in xmlish_heuristics:
            if header.find(marker, 0, 64) >= 0:
                return mime

    # try some common cases for JSON
    for pattern in json_heuristics:
        if pattern.match(header):
            return 'application/json'

    # nothing matched
    return fallback


def show_it(conn, start: bytes, rest) -> None:
    'Handle both normal input and data-URIs.'

    # handle base64-encoded data-URIs
    if start.startswith(b'data:'):
        i = start.find(b';base64,', 0, 64)
        if i > 0:
            mime_type = str(start[len('data:'):i], encoding='utf-8')
            encoded = BytesIO()
            encoded.write(start[i + len(';base64,'):])
            encoded.write(rest.read())
            decoded = b64decode(encoded.getvalue())
            encoded.close()

            inp = BytesIO(decoded)
            if mime_type == '':
                start = inp.read(4096)
                mime_type = guess_mime(start, 'text/plain; charset=UTF-8')
                show_it_as(conn, start, inp, mime_type)
            else:
                show_it_as(conn, bytes(), inp, mime_type)
            return

    mime_type = guess_mime(start, 'text/plain; charset=UTF-8')
    return show_it_as(conn, start, rest, mime_type)


def show_it_as(conn, start: bytes, rest, mime_type: str) -> None:
    'This is where the web-serving action happens.'

    # read-ignore all client headers
    while True:
        if conn.recv(1024).endswith(b'\r\n\r\n'):
            break

    # web-browsers insist on auto-downloads when given wave or flac audio
    for e in ('audio/x-wav', 'audio/x-flac'):
        if e == mime_type:
            handle_sound_workaround(conn, mime_type, start, rest)
            return

    # web-browsers insist on auto-downloads when given bitmap pictures
    if mime_type == 'image/x-bmp':
        handle_image_workaround(conn, mime_type, start, rest)
        return

    # handle all other data formats
    o = conn.sendall

    o(b'HTTP/1.1 200 OK\r\n')
    o(bytes(f'Content-Type: {mime_type}\r\n', encoding='utf-8'))
    o(b'Content-Disposition: inline\r\n')
    # tell browser this is the only/last request
    o(b'Connection: close\r\n')
    # payload starts right after an empty line
    o(b'\r\n')

    # send all input bytes
    o(start)
    conn.sendfile(rest)


def handle_sound_workaround(conn, mime: str, start: bytes, rest) -> None:
    data = BytesIO()
    pre = f'    <audio controls autofocus src="data:{mime};base64,'

    def emit_inner_body() -> None:
        conn.sendall(bytes(pre, encoding='utf-8'))
        data.write(start)
        data.write(rest.read())
        conn.sendall(b64encode(data.getvalue()))
        conn.sendall(b'"></audio>\n')

    handle_workaround(conn, 'Wave-Audio Sound', emit_inner_body)
    data.close()


def handle_image_workaround(conn, mime: str, start: bytes, rest) -> None:
    data = BytesIO()
    pre = f'    <img src="data:{mime};base64,'

    def emit_inner_body() -> None:
        conn.sendall(bytes(pre, encoding='utf-8'))
        data.write(start)
        data.write(rest.read())
        conn.sendall(b64encode(data.getvalue()))
        conn.sendall(b'">\n')

    handle_workaround(conn, 'Bitmap Picture', emit_inner_body)
    data.close()


def handle_workaround(conn, title: str, handle_inner_body: Callable) -> None:
    'Avoid annoying auto-download web-browser behavior.'

    o = conn.sendall

    o(b'HTTP/1.1 200 OK\r\n')
    # auto-detect content-type, and announce it to the client
    o(b'Content-Type: text/html; charset=UTF-8\r\n')
    # discourage web-browsers' download-dialogs and/or auto-downloads
    o(b'Content-Disposition: inline\r\n')
    # tell browser this is the last request
    o(b'Connection: close\r\n')
    # payload starts right after an empty line
    o(b'\r\n')

    # emit HTML work-around
    o(b'<!DOCTYPE html>\n')
    o(b'<html lang="en">\n')
    o(b'<head>\n')
    o(b'    <meta charset="UTF-8">\n')
    o(b'    <link rel="icon" href="data:,">\n')
    o(b'    <meta name="viewport"')
    o(b' content="width=device-width, initial-scale=1.0">\n')
    o(bytes(f'    <title>{title}</title>\n', encoding='utf-8'))
    o(b'    <style>\n')
    o(b'        body { margin: auto; }\n')
    o(b'        audio { display: block; margin: auto; width: 90vw; }\n')
    o(b'        img { display: block; margin: auto; }\n')
    o(b'    </style>\n')
    o(b'</head>\n')
    o(b'<body>\n')
    handle_inner_body()
    o(b'</body>\n')
    o(b'</html>\n')


try:
    # opening socket on port 0 randomly picks an available port
    sock = socket()
    sock.bind(('localhost', 0))
    port = sock.getsockname()[1]
    sock.settimeout(10.0)
    # only handle one thing at a time, since it's a one-off server
    sock.listen(1)

    open_new_tab(f'http://localhost:{port}')

    # handle only a single request-response cycle
    conn, addr = sock.accept()
    show_it(conn, stdin.buffer.read(4096), stdin.buffer)
    conn.close()

    sock.close()
except Exception as e:
    print(f'\x1b[31m{e}\x1b[0m', file=stderr)
    exit(1)