File: podfeed.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # podfeed [options...] [filepaths/URIs...] 27 # 28 # PODcast FEED fetches all episodes from the feeds given as URIs, either as 29 # arguments, or as lines in the plain-text files given. 30 # 31 # The result is self-contained HTML which links to all episodes, and adds 32 # many little extras, such as tooltips showing date of publication and play 33 # duration. 34 # 35 # Podcast thumbnails are also included inline as data-URIs. Doing so can make 36 # output size considerably bigger: podcast logos already tend to come as big 37 # pictures, and their base-64 encoding further adds to their size, but using 38 # external URIs for them would no longer make the output self-contained. 39 # 40 # All (optional) leading options start with either single or double-dash, 41 # and most of them change the style/color used. Some of the options are, 42 # shown in their single-dash form: 43 # 44 # -h show this help message 45 # -help show this help message 46 # 47 # -title use the next argument as the title in the HTML output 48 49 50 from datetime import datetime 51 from html import escape 52 from multiprocessing import Pool 53 from sys import argv, exit, stderr, stdin, stdout 54 from typing import Dict, List 55 from urllib.parse import urlparse, urlunparse 56 from urllib.request import urlopen 57 from xml.dom.minidom import parse 58 59 60 # info is the message shown when the leading argument is one of the standard 61 # cmd-line help options 62 info = ''' 63 podfeed [options...] [filepaths/URIs...] 64 65 PODcast FEED fetches all episodes from the feeds given as URIs, either as 66 arguments, or as lines in the plain-text files given. 67 68 The result is self-contained HTML which links to all episodes, and adds 69 many little extras, such as tooltips showing date of publication and play 70 duration. 71 72 Podcast thumbnails are also included inline as data-URIs. Doing so can make 73 output size considerably bigger: podcast logos already tend to come as big 74 pictures, and their base-64 encoding further adds to their size, but using 75 external URIs for them would no longer make the output self-contained. 76 77 All (optional) leading options start with either single or double-dash, 78 and most of them change the style/color used. Some of the options are, 79 shown in their single-dash form: 80 81 -h show this help message 82 -help show this help message 83 84 -title use the next argument as the title in the HTML output 85 '''.strip() 86 87 # a leading help-option arg means show the help message and quit 88 if len(argv) == 2 and argv[1].lower() in ('-h', '--h', '-help', '--help'): 89 print(info, file=stderr) 90 exit(0) 91 92 93 def fail(msg, code: int = 1) -> None: 94 '''Show the error message given, and quit the app right away.''' 95 print(f'\x1b[31m{msg}\x1b[0m', file=stderr) 96 exit(code) 97 98 99 # handle leading cmd-line options 100 title = '' 101 start_args = 1 102 while start_args < len(argv) and argv[start_args].startswith('-'): 103 l = argv[start_args].lstrip('-').lower() 104 if l in ('title'): 105 if start_args + 1 >= len(argv): 106 fail('missing actual title in cmd-line arguments', 1) 107 title = argv[start_args + 1] 108 start_args += 2 109 continue 110 break 111 args = argv[start_args:] 112 113 # use a default web-page title if one wasn't given 114 if title == '': 115 now = datetime.now() 116 ymd = f'{now.year}-{now.month:02}-{now.day:02}' 117 hms = f'{now.hour}:{now.minute:02}:{now.second:02}' 118 title = f'Latest Podcast Episodes as of {ymd} {hms}' 119 120 121 def parse_feed(uri: str) -> Dict: 122 ''' 123 Turn an XML feed into more convenient-to-use dictionaries, 124 given the feed's URI. 125 ''' 126 127 res = {'rss': []} 128 with urlopen(uri) as inp: 129 feed = parse(inp) 130 for rss in feed.getElementsByTagName('rss'): 131 channels = rss.getElementsByTagName('channel') 132 channels = [parse_channel(chan) for chan in channels] 133 res['rss'].append({'channels': channels}) 134 return res 135 136 137 def parse_channel(chan) -> Dict: 138 '''Help func parse_feed do its job''' 139 140 title = get_str(chan, 'title') 141 link = get_str(chan, 'link') 142 descr = get_str(chan, 'description') 143 # no channel thumbnail for now 144 145 episodes = chan.getElementsByTagName('item') 146 episodes = [parse_episode(ep) for ep in episodes] 147 148 return { 149 'title': title, 150 'link': link, 151 'description': descr, 152 'episodes': episodes, 153 } 154 155 156 def parse_episode(episode) -> Dict: 157 '''Help func parse_channel do its job''' 158 159 title = get_str(episode, 'title') 160 link = get_str(episode, 'link') 161 description = get_str(episode, 'description') 162 pub_date = get_str(episode, 'pubDate') 163 duration = get_str(episode, 'itunes:duration') 164 for enc in episode.getElementsByTagName('enclosure'): 165 link = enc.getAttribute('url') 166 167 return { 168 'title': title, 169 'link': link, 170 'description': description, 171 'pub_date': pub_date, 172 'duration': duration, 173 } 174 175 176 def render_feed(feed) -> None: 177 '''Handle a single parsed RSS feed.''' 178 179 indent = 12 * ' ' 180 print(' <article>') 181 182 for rss in feed['rss']: 183 for chan in rss['channels']: 184 href = urlunparse(urlparse(chan['link'])) 185 title = escape(chan['title']) 186 descr = escape(chan['description']) 187 a = make_anchor(href, title) 188 s = f'{indent}<h1><summary title="{descr}">{a}</summary></h1>' 189 print(s) 190 # no channel thumbnail for now 191 192 for episode in chan['episodes']: 193 render_episode(episode) 194 195 print(' </article>') 196 197 198 def render_episode(episode) -> None: 199 '''Help func render_feed do its job.''' 200 201 title = escape(episode['title']) 202 href = urlunparse(urlparse(episode['link'])) 203 description = escape(episode['description']) 204 pub_date = escape(episode['pub_date']) 205 duration = escape(episode['duration']) 206 tt = make_tooltip(pub_date, duration) 207 a = make_anchor(href, title) 208 209 print(' <section>') 210 print(' <details>') 211 print(f' <summary title="{tt}">{a}</summary>') 212 print(f' <p>{description}</p>') 213 print(' </details>') 214 print(' </section>') 215 216 217 def make_anchor(href: str, title: str) -> str: 218 '''Standardize how hyperlinks are handled in this script.''' 219 return f'<a target="_blank" rel="noreferrer" href="{href}">{title}</a>' 220 221 222 def make_tooltip(pub_date: str, duration: str) -> str: 223 try: 224 # because datetime's supposedly-idiomatic solutions are so ugly 225 s = int(duration) 226 h = int(s / 3600) 227 m = int(s / 60) % 3600 228 s %= 60 229 hms = f'{h:02}:{m:02}:{s:02}'.lstrip('00:') 230 except: 231 hms = duration 232 return f'published: {pub_date} | duration: {hms}' 233 234 235 def get_str(src, tag: str) -> str: 236 '''Simplify the control-flow of various feed-parsing funcs.''' 237 238 try: 239 res = src.getElementsByTagName(tag) 240 if len(res) == 0: 241 return '' 242 for e in res[0].childNodes: 243 if e.nodeType in (e.TEXT_NODE, e.CDATA_SECTION_NODE): 244 return e.data.strip() 245 return '' 246 except: 247 return '' 248 249 250 def get_uris(src) -> List[str]: 251 '''This func helps func load_feed_uris load all URIs from a file.''' 252 253 uris = [] 254 for line in src: 255 line = line.rstrip('\r\n').rstrip('\n').strip() 256 if line == '' or line.startswith('#'): 257 continue 258 uris.append(line) 259 return uris 260 261 262 def load_feed_uris(args: List[str]) -> List[str]: 263 '''Turn a mix of URIs and filepaths into a list of URIs to load.''' 264 265 if len(args) == 0: 266 return get_uris(stdin) 267 268 uris = [] 269 if args.count('-') > 1: 270 msg = 'reading from `-` (standard input) more than once not allowed' 271 raise ValueError(msg) 272 273 for path in args: 274 if path.startswith('https://') or path.startswith('http://'): 275 uris.append(path) 276 continue 277 278 if path == '-': 279 uris.extend(get_uris(stdin)) 280 281 with open(path) as inp: 282 uris.extend(get_uris(inp)) 283 284 return uris 285 286 287 # style is the `inner` CSS used inside the style tag, and handles all 288 # visual styles for all supported input types 289 style = ''' 290 body { 291 font-size: 0.9rem; 292 margin: 0 0 2rem 0; 293 font-family: system-ui, -apple-system, sans-serif; 294 } 295 296 main { 297 margin: auto; 298 display: flex; 299 width: fit-content; 300 } 301 302 h1 { 303 top: 0; 304 position: sticky; 305 font-size: 0.9rem; 306 text-align: center; 307 background-color: white; 308 } 309 310 img { 311 margin: auto; 312 margin-bottom: 1rem; 313 display: block; 314 max-width: 15ch; 315 } 316 317 section { 318 width: 48ch; 319 padding: 0.3rem; 320 margin: 0 0.1rem; 321 } 322 323 section:nth-child(2n+1) { 324 background-color: #eee; 325 } 326 327 a { 328 color: steelblue; 329 text-decoration: none; 330 } 331 332 details p { 333 line-height: 1.3rem; 334 } 335 '''.strip('\n') 336 337 338 try: 339 stdout.reconfigure(newline='\n', encoding='utf-8') 340 feeds = load_feed_uris(args) 341 342 print('<!DOCTYPE html>') 343 print('<html lang="en">') 344 print('<head>') 345 print(' <meta charset="UTF-8">') 346 print(' <link rel="icon" href="data:,">') 347 cattr = 'content="width=device-width, initial-scale=1.0"' 348 print(f' <meta name="viewport" {cattr}>') 349 if title != '': 350 print(f' <title>{escape(title)}</title>') 351 print(' <style>') 352 print(style) 353 print(' </style>') 354 print('</head>') 355 print('<body>') 356 print(' <main>') 357 358 # significantly speed-up script by loading/parsing feeds concurrently 359 with Pool(processes=min(4, len(feeds))) as pool: 360 feeds = pool.map(parse_feed, feeds) 361 362 # render parsed feeds sequentially 363 for feed in feeds: 364 render_feed(feed) 365 366 print(' </main>') 367 print('</body>') 368 print('</html>') 369 except Exception as e: 370 fail(e, 1)