#!/usr/bin/python3 # The MIT License (MIT) # # Copyright © 2020-2025 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from html import escape from io import StringIO, TextIOWrapper from re import compile from sys import argv, exit, stderr, stdin from urllib.parse import urlparse, urlunparse info = ''' htmlify [options...] [filepaths/URIs...] Render plain-text prose into self-contained HTML. Lines which are just a valid data-URI are turned into pictures, audio, or even video elements. All HTTP(s) URIs are autodetected and rendered as hyperlinks, even when lines have multiple URIs in them. If a title isn't given from the cmd-line options, the first line is used as the title. All (optional) leading options start with either single or double-dash, and most of them change the style/color used. Some of the options are, shown in their single-dash form: -h show this help message -help show this help message -title use the next argument as the title in the HTML output ''' # handle standard help cmd-line options, quitting right away in that case if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): print(info.strip()) exit(0) # links is used in func handle_normal_text_line to handle hyperlinks links = compile('''(https?|ftps?)\://[a-zA-Z0-9_%.,?/&=-]+''') # style is the `inner` CSS used inside the style tag, and handles all # visual styles for all supported input types style = ''' body { margin: 1rem auto 2rem auto; padding: 0.25rem; font-size: 1.1rem; line-height: 1.8rem; font-family: Arial, Helvetica, sans-serif; max-width: 95vw; /* width: max-content; */ width: fit-content; box-sizing: border-box; display: block; } a { color: steelblue; text-decoration: none; } p { display: block; margin: auto; width: 80ch; } audio { width: 60ch; } table { margin: 2rem auto; border-collapse: collapse; } thead>* { position: sticky; top: 0; background-color: white; } tfoot th { user-select: none; } th, td { padding: 0.1rem 1ch; min-width: 4ch; border-bottom: solid thin transparent; } tr:nth-child(5n) td { border-bottom: solid thin #ccc; } .monospace { font-family: monospace; } '''.strip('\n') def fail(msg, code: int = 1) -> None: 'Show the error message given, and quit the app right away.' print(f'\x1b[31m{msg}\x1b[0m', file=stderr) exit(code) title = '' start_args = 1 while start_args < len(argv) and argv[start_args].startswith('-'): if argv[start_args] in ('-title', '--title'): if start_args + 1 >= len(argv): fail('missing actual title in cmd-line arguments', 1) title = escape(argv[start_args + 1]) start_args += 2 continue break args = argv[start_args:] def is_base64(n: int) -> bool: 'Help build base64-byte-checker lookup tables.' if ord('0') <= n <= ord('9'): return True if ord('A') <= n <= ord('Z'): return True if ord('a') <= n <= ord('z'): return True return n in (ord('+'), ord('/'), ord('=')) # valid_base64 helps func seems_supported_data_uri do its job quickly valid_base64 = tuple(is_base64(n) for n in range(256)) def start_page(title: str) -> None: print('') print('') print('') print(' ') print(' ') cattr = 'content="width=device-width, initial-scale=1.0"' print(f' ') if title: print(f' {escape(title)}') print(' ') print('') print('') def shorten(s: str, maxchars: int) -> str: return s if len(s) <= maxchars else s[:maxchars] def handle_text(src, title: str, first: bool) -> None: 'Render plain-text prose.' prev = '' # buf is a reusable string-buffer for func handle_normal_text_line buf = StringIO() num_lines = 0 for i, line in enumerate(src): line = line.rstrip('\r\n').rstrip('\n').rstrip() if not (prev or line): # keep skipping empty(ish) lines in runs of such lines continue num_lines += 1 if first and num_lines == 1: if title: start_page(shorten(title, 100)) else: start_page(shorten(line, 100)) continue if (not line) and prev: print('

') if not prev: print('

') prev = line if seems_supported_data_uri(line): handle_data_uri(line) print('
') else: handle_normal_text_line(line, buf) # don't forget to close last paragraph if line: print('

') def handle_normal_text_line(line: str, buf: StringIO) -> None: 'Handle prose lines for func handle_text.' # get rid of previous buffer content buf.truncate(0) buf.seek(0) # j keeps track of end of detected hyperlinks, and is used outside # the regex-match loop to detect trailing parts in lines j = 0 # matches is to keep track of whether any matches occurred matches = 0 for m in links.finditer(line): matches += 1 # remember previous index-end, used to emit the part before # the current match start = j i = m.start() j = m.end() # remember part before match buf.write(escape(line[start:i])) # replace matched hyperlink with an html anchor tag for it href = line[i:j] buf.write(f'{href}') if matches == 0: # avoid emptying lines with no matches print(f'{escape(line)}
') return # no need to copy the line when it's not changing anyway if j > 0: # don't forget the last part of the line, or the whole line buf.write(escape(line[j:])) buf.write('
') print(buf.getvalue()) data_uri_starts = { 'data:image/': '', end='') return True return False def anchorize(href: str) -> str: rel = 'rel="noopener noreferrer"' return f'{escape(href)}' def seems_url(s: str) -> bool: protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') return any(s.startswith(p) for p in protocols) if args.count('-') > 1: fail('reading from `-` (standard input) more than once not allowed', 1) if any(seems_url(e) for e in args): from urllib.request import urlopen try: for i, path in enumerate(args): if path == '-': handle_text(stdin, title, i == 0) continue if seems_url(path): with urlopen(path) as inp: ctype = inp.getheader('Content-Type') if not isinstance(ctype, str): ctype = '' # try to detect response encoding, if given enc = 'utf-8' i = ctype.find('charset=') if i >= 0: enc = ctype[i + len('charset='):] with TextIOWrapper(inp, encoding=enc) as txt: handle_text(txt, title, i == 0) continue with open(path, encoding='utf-8') as inp: handle_text(inp, title, i == 0) if len(args) == 0: handle_text(stdin, title, True) print('') print('') except BrokenPipeError: # quit quietly, instead of showing a confusing error message stderr.close() exit(0) except KeyboardInterrupt: exit(2) except Exception as e: fail(e, 1)