File: podfeed.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from datetime import datetime
  27 from html import escape
  28 from multiprocessing import Pool
  29 from sys import argv, exit, stderr, stdin
  30 from typing import Dict, List
  31 from urllib.parse import urlparse, urlunparse
  32 from urllib.request import urlopen
  33 from xml.dom.minidom import parse
  34 
  35 
  36 info = '''
  37 podfeed [options...] [filepaths/URIs...]
  38 
  39 
  40 PODcast FEED fetches all episodes from the feeds given as URIs, either as
  41 arguments, or as lines in the plain-text files given.
  42 
  43 The result is self-contained HTML which links to all episodes, and adds
  44 many little extras, such as tooltips showing date of publication and play
  45 duration.
  46 
  47 Podcast thumbnails aren't included as inline data-URIs, to avoid making
  48 the output size considerably bigger; they could easily source external
  49 URIs, but doing that would make the output no longer fully self-contained.
  50 
  51 All (optional) leading options start with either single or double-dash,
  52 and most of them change the style/color used. Some of the options are,
  53 shown in their single-dash form:
  54 
  55     -h          show this help message
  56     -help       show this help message
  57 
  58     -title      use the next argument as the title in the HTML output
  59 '''
  60 
  61 # a leading help-option arg means show the help message and quit
  62 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  63     print(info.strip(), file=stderr)
  64     exit(0)
  65 
  66 
  67 def fail(msg, code: int = 1) -> None:
  68     'Show the error message given, and quit the app right away.'
  69     print(f'\x1b[31m{msg}\x1b[0m', file=stderr)
  70     exit(code)
  71 
  72 
  73 # handle leading cmd-line options
  74 title = ''
  75 start_args = 1
  76 while start_args < len(argv) and argv[start_args].startswith('-'):
  77     l = argv[start_args].lstrip('-').lower()
  78     if l in ('title'):
  79         if start_args + 1 >= len(argv):
  80             fail('missing actual title in cmd-line arguments', 1)
  81         title = argv[start_args + 1]
  82         start_args += 2
  83         continue
  84     break
  85 args = argv[start_args:]
  86 
  87 # use a default web-page title if one wasn't given
  88 if title == '':
  89     now = datetime.now()
  90     ymd = f'{now.year}-{now.month:02}-{now.day:02}'
  91     hms = f'{now.hour}:{now.minute:02}:{now.second:02}'
  92     title = f'Latest Podcast Episodes as of {ymd} {hms}'
  93 
  94 
  95 def parse_feed(uri: str) -> Dict:
  96     'Turn an XML feed into dictionaries, given the feed\'s URI.'
  97 
  98     res = {'rss': []}
  99     with urlopen(uri) as inp:
 100         feed = parse(inp)
 101         for rss in feed.getElementsByTagName('rss'):
 102             channels = rss.getElementsByTagName('channel')
 103             channels = [parse_channel(chan) for chan in channels]
 104             res['rss'].append({'channels': channels})
 105     return res
 106 
 107 
 108 def parse_channel(chan) -> Dict:
 109     'Help func parse_feed do its job.'
 110 
 111     title = get_str(chan, 'title')
 112     link = get_str(chan, 'link')
 113     descr = get_str(chan, 'description')
 114     # no channel thumbnail for now
 115 
 116     episodes = chan.getElementsByTagName('item')
 117     episodes = [parse_episode(ep) for ep in episodes]
 118 
 119     return {
 120         'title': title,
 121         'link': link,
 122         'description': descr,
 123         'episodes': episodes,
 124     }
 125 
 126 
 127 def parse_episode(episode) -> Dict:
 128     'Help func parse_channel do its job.'
 129 
 130     title = get_str(episode, 'title')
 131     link = get_str(episode, 'link')
 132     description = get_str(episode, 'description')
 133     pub_date = get_str(episode, 'pubDate')
 134     duration = get_str(episode, 'itunes:duration')
 135     for enc in episode.getElementsByTagName('enclosure'):
 136         link = enc.getAttribute('url')
 137 
 138     return {
 139         'title': title,
 140         'link': link,
 141         'description': description,
 142         'pub_date': pub_date,
 143         'duration': duration,
 144     }
 145 
 146 
 147 def render_feed(feed) -> None:
 148     'Handle a single parsed RSS feed.'
 149 
 150     indent = 12 * ' '
 151     print('        <article>')
 152 
 153     for rss in feed['rss']:
 154         for chan in rss['channels']:
 155             href = urlunparse(urlparse(chan['link']))
 156             title = escape(chan['title'])
 157             descr = escape(chan['description'])
 158             a = make_anchor(href, title)
 159             s = f'{indent}<h1><summary title="{descr}">{a}</summary></h1>'
 160             print(s)
 161             # no channel thumbnail for now
 162 
 163             for episode in chan['episodes']:
 164                 render_episode(episode)
 165 
 166     print('        </article>')
 167 
 168 
 169 def render_episode(episode) -> None:
 170     'Help func render_feed do its job.'
 171 
 172     title = escape(episode['title'])
 173     href = urlunparse(urlparse(episode['link']))
 174     description = escape(episode['description'])
 175     pub_date = escape(episode['pub_date'])
 176     duration = escape(episode['duration'])
 177     tt = make_tooltip(pub_date, duration)
 178     a = make_anchor(href, title)
 179 
 180     print('            <section>')
 181     print('                <details>')
 182     print(f'                    <summary title="{tt}">{a}</summary>')
 183     print(f'                    <p>{description}</p>')
 184     print('                </details>')
 185     print('            </section>')
 186 
 187 
 188 def make_anchor(href: str, title: str) -> str:
 189     'Standardize how hyperlinks are handled in this script.'
 190     return f'<a target="_blank" rel="noreferrer" href="{href}">{title}</a>'
 191 
 192 
 193 def make_tooltip(pub_date: str, duration: str) -> str:
 194     try:
 195         # because datetime's supposedly-idiomatic solutions are so ugly
 196         s = int(duration)
 197         h = int(s / 3600)
 198         m = int(s / 60) % 3600
 199         s %= 60
 200         hms = f'{h:02}:{m:02}:{s:02}'.lstrip('00:')
 201     except Exception:
 202         hms = duration
 203     return f'published: {pub_date} | duration: {hms}'
 204 
 205 
 206 def get_str(src, tag: str) -> str:
 207     'Simplify the control-flow of various feed-parsing funcs.'
 208 
 209     try:
 210         res = src.getElementsByTagName(tag)
 211         if len(res) == 0:
 212             return ''
 213         for e in res[0].childNodes:
 214             if e.nodeType in (e.TEXT_NODE, e.CDATA_SECTION_NODE):
 215                 return e.data.strip()
 216         return ''
 217     except Exception:
 218         return ''
 219 
 220 
 221 def get_uris(src) -> List[str]:
 222     'This func helps func load_feed_uris load all URIs from a file.'
 223 
 224     uris = []
 225     for line in src:
 226         line = line.rstrip('\r\n').rstrip('\n').strip()
 227         if line == '' or line.startswith('#'):
 228             continue
 229         uris.append(line)
 230     return uris
 231 
 232 
 233 def load_feed_uris(args: List[str]) -> List[str]:
 234     'Turn a mix of URIs and filepaths into a list of URIs to load.'
 235 
 236     if args.count('-') > 1:
 237         msg = 'reading from `-` (standard input) more than once not allowed'
 238         raise ValueError(msg)
 239 
 240     if len(args) == 0:
 241         return get_uris(stdin)
 242 
 243     uris = []
 244 
 245     for path in args:
 246         if path.startswith('https://') or path.startswith('http://'):
 247             uris.append(path)
 248             continue
 249 
 250         if path == '-':
 251             uris.extend(get_uris(stdin))
 252 
 253         with open(path, encoding='utf-8') as inp:
 254             uris.extend(get_uris(inp))
 255 
 256     return uris
 257 
 258 
 259 # style is the `inner` CSS used inside the style tag
 260 style = '''
 261         body {
 262             font-size: 0.9rem;
 263             margin: 0 0 2rem 0;
 264             font-family: system-ui, -apple-system, sans-serif;
 265         }
 266 
 267         main {
 268             margin: auto;
 269             display: flex;
 270             width: fit-content;
 271         }
 272 
 273         h1 {
 274             top: 0;
 275             position: sticky;
 276             font-size: 0.9rem;
 277             text-align: center;
 278             background-color: white;
 279         }
 280 
 281         img {
 282             margin: auto;
 283             margin-bottom: 1rem;
 284             display: block;
 285             max-width: 15ch;
 286         }
 287 
 288         section {
 289             width: 48ch;
 290             padding: 0.3rem;
 291             margin: 0 0.1rem;
 292         }
 293 
 294         section:nth-child(2n+1) {
 295             background-color: #eee;
 296         }
 297 
 298         a {
 299             color: steelblue;
 300             text-decoration: none;
 301         }
 302 
 303         details p {
 304             line-height: 1.3rem;
 305         }
 306 '''.strip('\n')
 307 
 308 
 309 try:
 310     feeds = load_feed_uris(args)
 311 
 312     print('<!DOCTYPE html>')
 313     print('<html lang="en">')
 314     print('<head>')
 315     print('    <meta charset="UTF-8">')
 316     print('    <link rel="icon" href="data:,">')
 317     cattr = 'content="width=device-width, initial-scale=1.0"'
 318     print(f'    <meta name="viewport" {cattr}>')
 319     if title != '':
 320         print(f'    <title>{escape(title)}</title>')
 321     print('    <style>')
 322     print(style)
 323     print('    </style>')
 324     print('</head>')
 325     print('<body>')
 326     print('    <main>')
 327 
 328     # significantly speed-up script by loading/parsing feeds concurrently
 329     with Pool(processes=min(4, len(feeds))) as pool:
 330         feeds = pool.map(parse_feed, feeds)
 331 
 332     for feed in feeds:
 333         render_feed(feed)
 334 
 335     print('    </main>')
 336     print('</body>')
 337     print('</html>')
 338 except BrokenPipeError:
 339     # quit quietly, instead of showing a confusing error message
 340     stderr.close()
 341 except KeyboardInterrupt:
 342     exit(2)
 343 except Exception as e:
 344     fail(e, 1)