#!/usr/bin/python3 # The MIT License (MIT) # # Copyright © 2024 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from html import escape from io import StringIO, TextIOWrapper from re import compile from sys import argv, exit, stderr, stdin from urllib.parse import urlparse, urlunparse info = ''' htmlify [options...] [filepaths/URIs...] Render plain-text prose into self-contained HTML. Lines which are just a valid data-URI are turned into pictures, audio, or even video elements. All HTTP(s) URIs are autodetected and rendered as hyperlinks, even when lines have multiple URIs in them. If a title isn't given from the cmd-line options, the first line is used as the title. All (optional) leading options start with either single or double-dash, and most of them change the style/color used. Some of the options are, shown in their single-dash form: -h show this help message -help show this help message -title use the next argument as the title in the HTML output ''' # handle standard help cmd-line options, quitting right away in that case if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): print(info.strip(), file=stderr) exit(0) # links is used in func handle_normal_text_line to handle hyperlinks links = compile('''(https?|ftps?)\://[a-zA-Z0-9_%.?/&=-]+''') # style is the `inner` CSS used inside the style tag, and handles all # visual styles for all supported input types style = ''' body { margin: 1rem auto 2rem auto; padding: 0.25rem; font-size: 1.1rem; line-height: 1.8rem; font-family: Arial, Helvetica, sans-serif; max-width: 95vw; /* width: max-content; */ width: fit-content; box-sizing: border-box; display: block; } a { color: steelblue; text-decoration: none; } p { display: block; margin: auto; width: 80ch; } audio { width: 60ch; } table { margin: 2rem auto; border-collapse: collapse; } thead>* { position: sticky; top: 0; background-color: white; } tfoot th { user-select: none; } th, td { padding: 0.1rem 1ch; min-width: 4ch; border-bottom: solid thin transparent; } tr:nth-child(5n) td { border-bottom: solid thin #ccc; } .monospace { font-family: monospace; } '''.strip('\n') def fail(msg, code: int = 1) -> None: 'Show the error message given, and quit the app right away.' print(f'\x1b[31m{msg}\x1b[0m', file=stderr) exit(code) title = '' start_args = 1 while start_args < len(argv) and argv[start_args].startswith('-'): if argv[start_args] in ('-title', '--title'): if start_args + 1 >= len(argv): fail('missing actual title in cmd-line arguments', 1) title = html_escape(argv[start_args + 1]) start_args += 2 continue break args = argv[start_args:] def is_base64(n: int) -> bool: 'Help build base64-byte-checker lookup tables.' if ord('0') <= n <= ord('9'): return True if ord('A') <= n <= ord('Z'): return True if ord('a') <= n <= ord('z'): return True return n in (ord('+'), ord('/'), ord('=')) # valid_base64 helps func seems_supported_data_uri do its job quickly valid_base64 = tuple(is_base64(n) for n in range(256)) def start_page(title: str) -> None: print('') print('') print('') print(' ') print(' ') cattr = 'content="width=device-width, initial-scale=1.0"' print(f' ') if title: print(f' {escape(title)}') print(' ') print('') print('') def html_escape(s: str) -> str: 'Safely escape generic plain-text.' s = s.replace('&', '&') s = s.replace('<', '<') s = s.replace('>', '>') return s def shorten(s: str, maxchars: int) -> str: return s if len(s) <= maxchars else s[:maxchars] def handle_text(src, title: str, first: bool) -> None: 'Render plain-text prose.' prev = '' # buf is a reusable string-buffer for func handle_normal_text_line buf = StringIO() for i, line in enumerate(src): line = line.rstrip('\r\n').rstrip('\n').rstrip() if not (prev or line): # keep skipping empty(ish) lines in runs of such lines continue line = html_escape(line) if first and i == 0: title = title if title else line start_page(shorten(title, 100)) if (not line) and prev: print('

') if not prev: print('

') prev = line if seems_supported_data_uri(line): handle_data_uri(line) print('
') else: handle_normal_text_line(line, buf) # don't forget to close last paragraph if line: print('

') def handle_normal_text_line(line: str, buf: StringIO) -> None: 'Handle prose lines for func handle_text.' # get rid of previous buffer content buf.truncate(0) buf.seek(0) # j keeps track of end of detected hyperlinks, and is used outside # the regex-match loop to detect trailing parts in lines j = 0 # matches is to keep track of whether any matches occurred matches = 0 for m in links.finditer(line): matches += 1 # remember previous index-end, used to emit the part before # the current match start = j i = m.start() j = m.end() # remember part before match buf.write(escape(line[start:i])) # replace matched hyperlink with an html anchor tag for it href = line[i:j] buf.write(f'{href}') if matches == 0: # avoid emptying lines with no matches print(f'{escape(line)}
') return # no need to copy the line when it's not changing anyway if j > 0: # don't forget the last part of the line, or the whole line buf.write(escape(line[j:])) buf.write('
') print(buf.getvalue()) def seems_supported_data_uri(s: str) -> bool: supported = ('data:image/','data:audio/', 'data:video/') if not any(s.startswith(e) for e in supported): return False base64_index = s.find(';base64,') if base64_index < 0: return False # check all payload bytes start = base64_index + len(';base64,') for i, c in enumerate(s): if i >= start and (not valid_base64[ord(c)]): return False return True def handle_data_uri(s: str) -> bool: if s.startswith('data:image/'): print('', end='') return True if s.startswith('data:audio/'): print('