File: podfeed.py 1 #!/usr/bin/python 2 3 # The MIT License (MIT) 4 # 5 # Copyright (c) 2026 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the "Software"), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from datetime import datetime 27 from html import escape 28 from multiprocessing import Pool 29 from sys import argv, exit, stderr, stdin 30 from typing import Dict, List 31 from urllib.parse import urlparse, urlunparse 32 from urllib.request import urlopen 33 from xml.dom.minidom import parse 34 35 36 info = ''' 37 podfeed [options...] [filepaths/URIs...] 38 39 40 PODcast FEED fetches all episodes from the feeds given as URIs, either as 41 arguments, or as lines in the plain-text files given. 42 43 The result is self-contained HTML which links to all episodes, and adds 44 many little extras, such as tooltips showing date of publication and play 45 duration. 46 47 Podcast thumbnails aren't included as inline data-URIs, to avoid making 48 the output size considerably bigger; they could easily source external 49 URIs, but doing that would make the output no longer fully self-contained. 50 51 All (optional) leading options start with either single or double-dash, 52 and most of them change the style/color used. Some of the options are, 53 shown in their single-dash form: 54 55 -h, -help show this help message 56 57 -title use the next argument as the title in the HTML output 58 ''' 59 60 # a leading help-option arg means show the help message and quit 61 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 62 print(info.strip()) 63 exit(0) 64 65 66 def fail(msg, code: int = 1) -> None: 67 'Show the error message given, and quit the app right away.' 68 print(str(msg), file=stderr) 69 exit(code) 70 71 72 # handle leading cmd-line options 73 title = '' 74 start_args = 1 75 while start_args < len(argv) and argv[start_args].startswith('-'): 76 l = argv[start_args].lstrip('-').lower() 77 if l in ('title'): 78 if start_args + 1 >= len(argv): 79 fail('missing actual title in cmd-line arguments', 1) 80 title = argv[start_args + 1] 81 start_args += 2 82 continue 83 break 84 args = argv[start_args:] 85 86 # use a default web-page title if one wasn't given 87 if title == '': 88 now = datetime.now() 89 ymd = f'{now.year}-{now.month:02}-{now.day:02}' 90 hms = f'{now.hour}:{now.minute:02}:{now.second:02}' 91 title = f'Latest Podcast Episodes as of {ymd} {hms}' 92 93 94 def parse_feed(uri: str) -> Dict: 95 'Turn an XML feed into dictionaries, given the feed\'s URI.' 96 97 res = {'rss': []} 98 with urlopen(uri) as inp: 99 feed = parse(inp) 100 for rss in feed.getElementsByTagName('rss'): 101 channels = rss.getElementsByTagName('channel') 102 channels = [parse_channel(chan) for chan in channels] 103 res['rss'].append({'channels': channels}) 104 return res 105 106 107 def parse_channel(chan) -> Dict: 108 'Help func parse_feed do its job.' 109 110 title = get_str(chan, 'title') 111 link = get_str(chan, 'link') 112 descr = get_str(chan, 'description') 113 # no channel thumbnail for now 114 115 episodes = chan.getElementsByTagName('item') 116 episodes = [parse_episode(ep) for ep in episodes] 117 118 return { 119 'title': title, 120 'link': link, 121 'description': descr, 122 'episodes': episodes, 123 } 124 125 126 def parse_episode(episode) -> Dict: 127 'Help func parse_channel do its job.' 128 129 title = get_str(episode, 'title') 130 link = get_str(episode, 'link') 131 description = get_str(episode, 'description') 132 pub_date = get_str(episode, 'pubDate') 133 duration = get_str(episode, 'itunes:duration') 134 for enc in episode.getElementsByTagName('enclosure'): 135 link = enc.getAttribute('url') 136 137 return { 138 'title': title, 139 'link': link, 140 'description': description, 141 'pub_date': pub_date, 142 'duration': duration, 143 } 144 145 146 def render_feed(feed) -> None: 147 'Handle a single parsed RSS feed.' 148 149 indent = 12 * ' ' 150 print(' <article>') 151 152 for rss in feed['rss']: 153 for chan in rss['channels']: 154 href = urlunparse(urlparse(chan['link'])) 155 title = escape(chan['title']) 156 descr = escape(chan['description']) 157 a = make_anchor(href, title) 158 s = f'{indent}<h1><summary title="{descr}">{a}</summary></h1>' 159 print(s) 160 # no channel thumbnail for now 161 162 for episode in chan['episodes']: 163 render_episode(episode) 164 165 print(' </article>') 166 167 168 def render_episode(episode) -> None: 169 'Help func render_feed do its job.' 170 171 title = escape(episode['title']) 172 href = urlunparse(urlparse(episode['link'])) 173 description = escape(episode['description']) 174 pub_date = escape(episode['pub_date']) 175 duration = escape(episode['duration']) 176 tt = make_tooltip(pub_date, duration) 177 a = make_anchor(href, title) 178 179 print(' <section>') 180 print(' <details>') 181 print(f' <summary title="{tt}">{a}</summary>') 182 print(f' <p>{description}</p>') 183 print(' </details>') 184 print(' </section>') 185 186 187 def make_anchor(href: str, title: str) -> str: 188 'Standardize how hyperlinks are handled in this script.' 189 return f'<a target="_blank" rel="noreferrer" href="{href}">{title}</a>' 190 191 192 def make_tooltip(pub_date: str, duration: str) -> str: 193 try: 194 # because datetime's supposedly-idiomatic solutions are so ugly 195 s = int(duration) 196 h = int(s / 3600) 197 m = int(s / 60) % 3600 198 s %= 60 199 hms = f'{h:02}:{m:02}:{s:02}'.lstrip('00:') 200 except Exception: 201 hms = duration 202 return f'published: {pub_date} | duration: {hms}' 203 204 205 def get_str(src, tag: str) -> str: 206 'Simplify the control-flow of various feed-parsing funcs.' 207 208 try: 209 res = src.getElementsByTagName(tag) 210 if len(res) == 0: 211 return '' 212 for e in res[0].childNodes: 213 if e.nodeType in (e.TEXT_NODE, e.CDATA_SECTION_NODE): 214 return e.data.strip() 215 return '' 216 except Exception: 217 return '' 218 219 220 def get_uris(src) -> List[str]: 221 'This func helps func load_feed_uris load all URIs from a file.' 222 223 uris = [] 224 for line in src: 225 line = line.rstrip('\r\n').rstrip('\n').strip() 226 if line == '' or line.startswith('#'): 227 continue 228 uris.append(line) 229 return uris 230 231 232 def load_feed_uris(args: List[str]) -> List[str]: 233 'Turn a mix of URIs and filepaths into a list of URIs to load.' 234 235 if args.count('-') > 1: 236 msg = 'reading from `-` (standard input) more than once not allowed' 237 raise ValueError(msg) 238 239 if len(args) == 0: 240 return get_uris(stdin) 241 242 uris = [] 243 244 for path in args: 245 if path.startswith('https://') or path.startswith('http://'): 246 uris.append(path) 247 continue 248 249 if path == '-': 250 uris.extend(get_uris(stdin)) 251 252 with open(path, encoding='utf-8') as inp: 253 uris.extend(get_uris(inp)) 254 255 return uris 256 257 258 # style is the `inner` CSS used inside the style tag 259 style = ''' 260 body { 261 font-size: 0.9rem; 262 margin: 0 0 2rem 0; 263 font-family: system-ui, -apple-system, sans-serif; 264 } 265 266 main { 267 margin: auto; 268 display: flex; 269 width: fit-content; 270 } 271 272 h1 { 273 top: 0; 274 position: sticky; 275 font-size: 0.9rem; 276 text-align: center; 277 background-color: white; 278 } 279 280 img { 281 margin: auto; 282 margin-bottom: 1rem; 283 display: block; 284 max-width: 15ch; 285 } 286 287 section { 288 width: 48ch; 289 padding: 0.3rem; 290 margin: 0 0.1rem; 291 } 292 293 section:nth-child(2n+1) { 294 background-color: #eee; 295 } 296 297 a { 298 color: steelblue; 299 text-decoration: none; 300 } 301 302 details p { 303 line-height: 1.3rem; 304 } 305 '''.strip('\n') 306 307 308 try: 309 feeds = load_feed_uris(args) 310 311 print('<!DOCTYPE html>') 312 print('<html lang="en">') 313 print('<head>') 314 print(' <meta charset="UTF-8">') 315 print(' <link rel="icon" href="data:,">') 316 cattr = 'content="width=device-width, initial-scale=1.0"' 317 print(f' <meta name="viewport" {cattr}>') 318 if title != '': 319 print(f' <title>{escape(title)}</title>') 320 print(' <style>') 321 print(style) 322 print(' </style>') 323 print('</head>') 324 print('<body>') 325 print(' <main>') 326 327 # significantly speed-up script by loading/parsing feeds concurrently 328 with Pool(processes=min(4, len(feeds))) as pool: 329 feeds = pool.map(parse_feed, feeds) 330 331 for feed in feeds: 332 render_feed(feed) 333 334 print(' </main>') 335 print('</body>') 336 print('</html>') 337 except BrokenPipeError: 338 # quit quietly, instead of showing a confusing error message 339 stderr.close() 340 except KeyboardInterrupt: 341 exit(2) 342 except Exception as e: 343 fail(e, 1)