File: podfeed.py
   1 #!/usr/bin/python
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright (c) 2026 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the "Software"), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from datetime import datetime
  27 from html import escape
  28 from multiprocessing import Pool
  29 from sys import argv, exit, stderr, stdin
  30 from typing import Dict, List
  31 from urllib.parse import urlparse, urlunparse
  32 from urllib.request import urlopen
  33 from xml.dom.minidom import parse
  34 
  35 
  36 info = '''
  37 podfeed [options...] [filepaths/URIs...]
  38 
  39 
  40 PODcast FEED fetches all episodes from the feeds given as URIs, either as
  41 arguments, or as lines in the plain-text files given.
  42 
  43 The result is self-contained HTML which links to all episodes, and adds
  44 many little extras, such as tooltips showing date of publication and play
  45 duration.
  46 
  47 Podcast thumbnails aren't included as inline data-URIs, to avoid making
  48 the output size considerably bigger; they could easily source external
  49 URIs, but doing that would make the output no longer fully self-contained.
  50 
  51 All (optional) leading options start with either single or double-dash,
  52 and most of them change the style/color used. Some of the options are,
  53 shown in their single-dash form:
  54 
  55     -h, -help    show this help message
  56 
  57     -title      use the next argument as the title in the HTML output
  58 '''
  59 
  60 # a leading help-option arg means show the help message and quit
  61 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
  62     print(info.strip())
  63     exit(0)
  64 
  65 
  66 def fail(msg, code: int = 1) -> None:
  67     'Show the error message given, and quit the app right away.'
  68     print(str(msg), file=stderr)
  69     exit(code)
  70 
  71 
  72 # handle leading cmd-line options
  73 title = ''
  74 start_args = 1
  75 while start_args < len(argv) and argv[start_args].startswith('-'):
  76     l = argv[start_args].lstrip('-').lower()
  77     if l in ('title'):
  78         if start_args + 1 >= len(argv):
  79             fail('missing actual title in cmd-line arguments', 1)
  80         title = argv[start_args + 1]
  81         start_args += 2
  82         continue
  83     break
  84 args = argv[start_args:]
  85 
  86 # use a default web-page title if one wasn't given
  87 if title == '':
  88     now = datetime.now()
  89     ymd = f'{now.year}-{now.month:02}-{now.day:02}'
  90     hms = f'{now.hour}:{now.minute:02}:{now.second:02}'
  91     title = f'Latest Podcast Episodes as of {ymd} {hms}'
  92 
  93 
  94 def parse_feed(uri: str) -> Dict:
  95     'Turn an XML feed into dictionaries, given the feed\'s URI.'
  96 
  97     res = {'rss': []}
  98     with urlopen(uri) as inp:
  99         feed = parse(inp)
 100         for rss in feed.getElementsByTagName('rss'):
 101             channels = rss.getElementsByTagName('channel')
 102             channels = [parse_channel(chan) for chan in channels]
 103             res['rss'].append({'channels': channels})
 104     return res
 105 
 106 
 107 def parse_channel(chan) -> Dict:
 108     'Help func parse_feed do its job.'
 109 
 110     title = get_str(chan, 'title')
 111     link = get_str(chan, 'link')
 112     descr = get_str(chan, 'description')
 113     # no channel thumbnail for now
 114 
 115     episodes = chan.getElementsByTagName('item')
 116     episodes = [parse_episode(ep) for ep in episodes]
 117 
 118     return {
 119         'title': title,
 120         'link': link,
 121         'description': descr,
 122         'episodes': episodes,
 123     }
 124 
 125 
 126 def parse_episode(episode) -> Dict:
 127     'Help func parse_channel do its job.'
 128 
 129     title = get_str(episode, 'title')
 130     link = get_str(episode, 'link')
 131     description = get_str(episode, 'description')
 132     pub_date = get_str(episode, 'pubDate')
 133     duration = get_str(episode, 'itunes:duration')
 134     for enc in episode.getElementsByTagName('enclosure'):
 135         link = enc.getAttribute('url')
 136 
 137     return {
 138         'title': title,
 139         'link': link,
 140         'description': description,
 141         'pub_date': pub_date,
 142         'duration': duration,
 143     }
 144 
 145 
 146 def render_feed(feed) -> None:
 147     'Handle a single parsed RSS feed.'
 148 
 149     indent = 12 * ' '
 150     print('        <article>')
 151 
 152     for rss in feed['rss']:
 153         for chan in rss['channels']:
 154             href = urlunparse(urlparse(chan['link']))
 155             title = escape(chan['title'])
 156             descr = escape(chan['description'])
 157             a = make_anchor(href, title)
 158             s = f'{indent}<h1><summary title="{descr}">{a}</summary></h1>'
 159             print(s)
 160             # no channel thumbnail for now
 161 
 162             for episode in chan['episodes']:
 163                 render_episode(episode)
 164 
 165     print('        </article>')
 166 
 167 
 168 def render_episode(episode) -> None:
 169     'Help func render_feed do its job.'
 170 
 171     title = escape(episode['title'])
 172     href = urlunparse(urlparse(episode['link']))
 173     description = escape(episode['description'])
 174     pub_date = escape(episode['pub_date'])
 175     duration = escape(episode['duration'])
 176     tt = make_tooltip(pub_date, duration)
 177     a = make_anchor(href, title)
 178 
 179     print('            <section>')
 180     print('                <details>')
 181     print(f'                    <summary title="{tt}">{a}</summary>')
 182     print(f'                    <p>{description}</p>')
 183     print('                </details>')
 184     print('            </section>')
 185 
 186 
 187 def make_anchor(href: str, title: str) -> str:
 188     'Standardize how hyperlinks are handled in this script.'
 189     return f'<a target="_blank" rel="noreferrer" href="{href}">{title}</a>'
 190 
 191 
 192 def make_tooltip(pub_date: str, duration: str) -> str:
 193     try:
 194         # because datetime's supposedly-idiomatic solutions are so ugly
 195         s = int(duration)
 196         h = int(s / 3600)
 197         m = int(s / 60) % 3600
 198         s %= 60
 199         hms = f'{h:02}:{m:02}:{s:02}'.lstrip('00:')
 200     except Exception:
 201         hms = duration
 202     return f'published: {pub_date} | duration: {hms}'
 203 
 204 
 205 def get_str(src, tag: str) -> str:
 206     'Simplify the control-flow of various feed-parsing funcs.'
 207 
 208     try:
 209         res = src.getElementsByTagName(tag)
 210         if len(res) == 0:
 211             return ''
 212         for e in res[0].childNodes:
 213             if e.nodeType in (e.TEXT_NODE, e.CDATA_SECTION_NODE):
 214                 return e.data.strip()
 215         return ''
 216     except Exception:
 217         return ''
 218 
 219 
 220 def get_uris(src) -> List[str]:
 221     'This func helps func load_feed_uris load all URIs from a file.'
 222 
 223     uris = []
 224     for line in src:
 225         line = line.rstrip('\r\n').rstrip('\n').strip()
 226         if line == '' or line.startswith('#'):
 227             continue
 228         uris.append(line)
 229     return uris
 230 
 231 
 232 def load_feed_uris(args: List[str]) -> List[str]:
 233     'Turn a mix of URIs and filepaths into a list of URIs to load.'
 234 
 235     if args.count('-') > 1:
 236         msg = 'reading from `-` (standard input) more than once not allowed'
 237         raise ValueError(msg)
 238 
 239     if len(args) == 0:
 240         return get_uris(stdin)
 241 
 242     uris = []
 243 
 244     for path in args:
 245         if path.startswith('https://') or path.startswith('http://'):
 246             uris.append(path)
 247             continue
 248 
 249         if path == '-':
 250             uris.extend(get_uris(stdin))
 251 
 252         with open(path, encoding='utf-8') as inp:
 253             uris.extend(get_uris(inp))
 254 
 255     return uris
 256 
 257 
 258 # style is the `inner` CSS used inside the style tag
 259 style = '''
 260         body {
 261             font-size: 0.9rem;
 262             margin: 0 0 2rem 0;
 263             font-family: system-ui, -apple-system, sans-serif;
 264         }
 265 
 266         main {
 267             margin: auto;
 268             display: flex;
 269             width: fit-content;
 270         }
 271 
 272         h1 {
 273             top: 0;
 274             position: sticky;
 275             font-size: 0.9rem;
 276             text-align: center;
 277             background-color: white;
 278         }
 279 
 280         img {
 281             margin: auto;
 282             margin-bottom: 1rem;
 283             display: block;
 284             max-width: 15ch;
 285         }
 286 
 287         section {
 288             width: 48ch;
 289             padding: 0.3rem;
 290             margin: 0 0.1rem;
 291         }
 292 
 293         section:nth-child(2n+1) {
 294             background-color: #eee;
 295         }
 296 
 297         a {
 298             color: steelblue;
 299             text-decoration: none;
 300         }
 301 
 302         details p {
 303             line-height: 1.3rem;
 304         }
 305 '''.strip('\n')
 306 
 307 
 308 try:
 309     feeds = load_feed_uris(args)
 310 
 311     print('<!DOCTYPE html>')
 312     print('<html lang="en">')
 313     print('<head>')
 314     print('    <meta charset="UTF-8">')
 315     print('    <link rel="icon" href="data:,">')
 316     cattr = 'content="width=device-width, initial-scale=1.0"'
 317     print(f'    <meta name="viewport" {cattr}>')
 318     if title != '':
 319         print(f'    <title>{escape(title)}</title>')
 320     print('    <style>')
 321     print(style)
 322     print('    </style>')
 323     print('</head>')
 324     print('<body>')
 325     print('    <main>')
 326 
 327     # significantly speed-up script by loading/parsing feeds concurrently
 328     with Pool(processes=min(4, len(feeds))) as pool:
 329         feeds = pool.map(parse_feed, feeds)
 330 
 331     for feed in feeds:
 332         render_feed(feed)
 333 
 334     print('    </main>')
 335     print('</body>')
 336     print('</html>')
 337 except BrokenPipeError:
 338     # quit quietly, instead of showing a confusing error message
 339     stderr.close()
 340 except KeyboardInterrupt:
 341     exit(2)
 342 except Exception as e:
 343     fail(e, 1)