#!/usr/bin/python3 # The MIT License (MIT) # # Copyright © 2024 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from datetime import datetime from html import escape from multiprocessing import Pool from sys import argv, exit, stderr, stdin from typing import Dict, List from urllib.parse import urlparse, urlunparse from urllib.request import urlopen from xml.dom.minidom import parse info = ''' podfeed [options...] [filepaths/URIs...] PODcast FEED fetches all episodes from the feeds given as URIs, either as arguments, or as lines in the plain-text files given. The result is self-contained HTML which links to all episodes, and adds many little extras, such as tooltips showing date of publication and play duration. Podcast thumbnails aren't included as inline data-URIs, to avoid making the output size considerably bigger; they could easily source external URIs, but doing that would make the output no longer fully self-contained. All (optional) leading options start with either single or double-dash, and most of them change the style/color used. Some of the options are, shown in their single-dash form: -h show this help message -help show this help message -title use the next argument as the title in the HTML output ''' # a leading help-option arg means show the help message and quit if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): print(info.strip(), file=stderr) exit(0) def fail(msg, code: int = 1) -> None: 'Show the error message given, and quit the app right away.' print(f'\x1b[31m{msg}\x1b[0m', file=stderr) exit(code) # handle leading cmd-line options title = '' start_args = 1 while start_args < len(argv) and argv[start_args].startswith('-'): l = argv[start_args].lstrip('-').lower() if l in ('title'): if start_args + 1 >= len(argv): fail('missing actual title in cmd-line arguments', 1) title = argv[start_args + 1] start_args += 2 continue break args = argv[start_args:] # use a default web-page title if one wasn't given if title == '': now = datetime.now() ymd = f'{now.year}-{now.month:02}-{now.day:02}' hms = f'{now.hour}:{now.minute:02}:{now.second:02}' title = f'Latest Podcast Episodes as of {ymd} {hms}' def parse_feed(uri: str) -> Dict: 'Turn an XML feed into dictionaries, given the feed\'s URI.' res = {'rss': []} with urlopen(uri) as inp: feed = parse(inp) for rss in feed.getElementsByTagName('rss'): channels = rss.getElementsByTagName('channel') channels = [parse_channel(chan) for chan in channels] res['rss'].append({'channels': channels}) return res def parse_channel(chan) -> Dict: 'Help func parse_feed do its job.' title = get_str(chan, 'title') link = get_str(chan, 'link') descr = get_str(chan, 'description') # no channel thumbnail for now episodes = chan.getElementsByTagName('item') episodes = [parse_episode(ep) for ep in episodes] return { 'title': title, 'link': link, 'description': descr, 'episodes': episodes, } def parse_episode(episode) -> Dict: 'Help func parse_channel do its job.' title = get_str(episode, 'title') link = get_str(episode, 'link') description = get_str(episode, 'description') pub_date = get_str(episode, 'pubDate') duration = get_str(episode, 'itunes:duration') for enc in episode.getElementsByTagName('enclosure'): link = enc.getAttribute('url') return { 'title': title, 'link': link, 'description': description, 'pub_date': pub_date, 'duration': duration, } def render_feed(feed) -> None: 'Handle a single parsed RSS feed.' indent = 12 * ' ' print('
') for rss in feed['rss']: for chan in rss['channels']: href = urlunparse(urlparse(chan['link'])) title = escape(chan['title']) descr = escape(chan['description']) a = make_anchor(href, title) s = f'{indent}

{a}

' print(s) # no channel thumbnail for now for episode in chan['episodes']: render_episode(episode) print('
') def render_episode(episode) -> None: 'Help func render_feed do its job.' title = escape(episode['title']) href = urlunparse(urlparse(episode['link'])) description = escape(episode['description']) pub_date = escape(episode['pub_date']) duration = escape(episode['duration']) tt = make_tooltip(pub_date, duration) a = make_anchor(href, title) print('
') print('
') print(f' {a}') print(f'

{description}

') print('
') print('
') def make_anchor(href: str, title: str) -> str: 'Standardize how hyperlinks are handled in this script.' return f'{title}' def make_tooltip(pub_date: str, duration: str) -> str: try: # because datetime's supposedly-idiomatic solutions are so ugly s = int(duration) h = int(s / 3600) m = int(s / 60) % 3600 s %= 60 hms = f'{h:02}:{m:02}:{s:02}'.lstrip('00:') except Exception: hms = duration return f'published: {pub_date} | duration: {hms}' def get_str(src, tag: str) -> str: 'Simplify the control-flow of various feed-parsing funcs.' try: res = src.getElementsByTagName(tag) if len(res) == 0: return '' for e in res[0].childNodes: if e.nodeType in (e.TEXT_NODE, e.CDATA_SECTION_NODE): return e.data.strip() return '' except Exception: return '' def get_uris(src) -> List[str]: 'This func helps func load_feed_uris load all URIs from a file.' uris = [] for line in src: line = line.rstrip('\r\n').rstrip('\n').strip() if line == '' or line.startswith('#'): continue uris.append(line) return uris def load_feed_uris(args: List[str]) -> List[str]: 'Turn a mix of URIs and filepaths into a list of URIs to load.' if args.count('-') > 1: msg = 'reading from `-` (standard input) more than once not allowed' raise ValueError(msg) if len(args) == 0: return get_uris(stdin) uris = [] for path in args: if path.startswith('https://') or path.startswith('http://'): uris.append(path) continue if path == '-': uris.extend(get_uris(stdin)) with open(path, encoding='utf-8') as inp: uris.extend(get_uris(inp)) return uris # style is the `inner` CSS used inside the style tag style = ''' body { font-size: 0.9rem; margin: 0 0 2rem 0; font-family: system-ui, -apple-system, sans-serif; } main { margin: auto; display: flex; width: fit-content; } h1 { top: 0; position: sticky; font-size: 0.9rem; text-align: center; background-color: white; } img { margin: auto; margin-bottom: 1rem; display: block; max-width: 15ch; } section { width: 48ch; padding: 0.3rem; margin: 0 0.1rem; } section:nth-child(2n+1) { background-color: #eee; } a { color: steelblue; text-decoration: none; } details p { line-height: 1.3rem; } '''.strip('\n') try: feeds = load_feed_uris(args) print('') print('') print('') print(' ') print(' ') cattr = 'content="width=device-width, initial-scale=1.0"' print(f' ') if title != '': print(f' {escape(title)}') print(' ') print('') print('') print('
') # significantly speed-up script by loading/parsing feeds concurrently with Pool(processes=min(4, len(feeds))) as pool: feeds = pool.map(parse_feed, feeds) for feed in feeds: render_feed(feed) print('
') print('') print('') except BrokenPipeError: # quit quietly, instead of showing a confusing error message stderr.close() except KeyboardInterrupt: exit(2) except Exception as e: fail(e, 1)