File: podfeed.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from datetime import datetime 27 from html import escape 28 from multiprocessing import Pool 29 from sys import argv, exit, stderr, stdin 30 from typing import Dict, List 31 from urllib.parse import urlparse, urlunparse 32 from urllib.request import urlopen 33 from xml.dom.minidom import parse 34 35 36 info = ''' 37 podfeed [options...] [filepaths/URIs...] 38 39 40 PODcast FEED fetches all episodes from the feeds given as URIs, either as 41 arguments, or as lines in the plain-text files given. 42 43 The result is self-contained HTML which links to all episodes, and adds 44 many little extras, such as tooltips showing date of publication and play 45 duration. 46 47 Podcast thumbnails aren't included as inline data-URIs, to avoid making 48 the output size considerably bigger; they could easily source external 49 URIs, but doing that would make the output no longer fully self-contained. 50 51 All (optional) leading options start with either single or double-dash, 52 and most of them change the style/color used. Some of the options are, 53 shown in their single-dash form: 54 55 -h show this help message 56 -help show this help message 57 58 -title use the next argument as the title in the HTML output 59 ''' 60 61 # a leading help-option arg means show the help message and quit 62 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 63 print(info.strip(), file=stderr) 64 exit(0) 65 66 67 def fail(msg, code: int = 1) -> None: 68 'Show the error message given, and quit the app right away.' 69 print(f'\x1b[31m{msg}\x1b[0m', file=stderr) 70 exit(code) 71 72 73 # handle leading cmd-line options 74 title = '' 75 start_args = 1 76 while start_args < len(argv) and argv[start_args].startswith('-'): 77 l = argv[start_args].lstrip('-').lower() 78 if l in ('title'): 79 if start_args + 1 >= len(argv): 80 fail('missing actual title in cmd-line arguments', 1) 81 title = argv[start_args + 1] 82 start_args += 2 83 continue 84 break 85 args = argv[start_args:] 86 87 # use a default web-page title if one wasn't given 88 if title == '': 89 now = datetime.now() 90 ymd = f'{now.year}-{now.month:02}-{now.day:02}' 91 hms = f'{now.hour}:{now.minute:02}:{now.second:02}' 92 title = f'Latest Podcast Episodes as of {ymd} {hms}' 93 94 95 def parse_feed(uri: str) -> Dict: 96 'Turn an XML feed into dictionaries, given the feed\'s URI.' 97 98 res = {'rss': []} 99 with urlopen(uri) as inp: 100 feed = parse(inp) 101 for rss in feed.getElementsByTagName('rss'): 102 channels = rss.getElementsByTagName('channel') 103 channels = [parse_channel(chan) for chan in channels] 104 res['rss'].append({'channels': channels}) 105 return res 106 107 108 def parse_channel(chan) -> Dict: 109 'Help func parse_feed do its job.' 110 111 title = get_str(chan, 'title') 112 link = get_str(chan, 'link') 113 descr = get_str(chan, 'description') 114 # no channel thumbnail for now 115 116 episodes = chan.getElementsByTagName('item') 117 episodes = [parse_episode(ep) for ep in episodes] 118 119 return { 120 'title': title, 121 'link': link, 122 'description': descr, 123 'episodes': episodes, 124 } 125 126 127 def parse_episode(episode) -> Dict: 128 'Help func parse_channel do its job.' 129 130 title = get_str(episode, 'title') 131 link = get_str(episode, 'link') 132 description = get_str(episode, 'description') 133 pub_date = get_str(episode, 'pubDate') 134 duration = get_str(episode, 'itunes:duration') 135 for enc in episode.getElementsByTagName('enclosure'): 136 link = enc.getAttribute('url') 137 138 return { 139 'title': title, 140 'link': link, 141 'description': description, 142 'pub_date': pub_date, 143 'duration': duration, 144 } 145 146 147 def render_feed(feed) -> None: 148 'Handle a single parsed RSS feed.' 149 150 indent = 12 * ' ' 151 print(' <article>') 152 153 for rss in feed['rss']: 154 for chan in rss['channels']: 155 href = urlunparse(urlparse(chan['link'])) 156 title = escape(chan['title']) 157 descr = escape(chan['description']) 158 a = make_anchor(href, title) 159 s = f'{indent}<h1><summary title="{descr}">{a}</summary></h1>' 160 print(s) 161 # no channel thumbnail for now 162 163 for episode in chan['episodes']: 164 render_episode(episode) 165 166 print(' </article>') 167 168 169 def render_episode(episode) -> None: 170 'Help func render_feed do its job.' 171 172 title = escape(episode['title']) 173 href = urlunparse(urlparse(episode['link'])) 174 description = escape(episode['description']) 175 pub_date = escape(episode['pub_date']) 176 duration = escape(episode['duration']) 177 tt = make_tooltip(pub_date, duration) 178 a = make_anchor(href, title) 179 180 print(' <section>') 181 print(' <details>') 182 print(f' <summary title="{tt}">{a}</summary>') 183 print(f' <p>{description}</p>') 184 print(' </details>') 185 print(' </section>') 186 187 188 def make_anchor(href: str, title: str) -> str: 189 'Standardize how hyperlinks are handled in this script.' 190 return f'<a target="_blank" rel="noreferrer" href="{href}">{title}</a>' 191 192 193 def make_tooltip(pub_date: str, duration: str) -> str: 194 try: 195 # because datetime's supposedly-idiomatic solutions are so ugly 196 s = int(duration) 197 h = int(s / 3600) 198 m = int(s / 60) % 3600 199 s %= 60 200 hms = f'{h:02}:{m:02}:{s:02}'.lstrip('00:') 201 except Exception: 202 hms = duration 203 return f'published: {pub_date} | duration: {hms}' 204 205 206 def get_str(src, tag: str) -> str: 207 'Simplify the control-flow of various feed-parsing funcs.' 208 209 try: 210 res = src.getElementsByTagName(tag) 211 if len(res) == 0: 212 return '' 213 for e in res[0].childNodes: 214 if e.nodeType in (e.TEXT_NODE, e.CDATA_SECTION_NODE): 215 return e.data.strip() 216 return '' 217 except Exception: 218 return '' 219 220 221 def get_uris(src) -> List[str]: 222 'This func helps func load_feed_uris load all URIs from a file.' 223 224 uris = [] 225 for line in src: 226 line = line.rstrip('\r\n').rstrip('\n').strip() 227 if line == '' or line.startswith('#'): 228 continue 229 uris.append(line) 230 return uris 231 232 233 def load_feed_uris(args: List[str]) -> List[str]: 234 'Turn a mix of URIs and filepaths into a list of URIs to load.' 235 236 if args.count('-') > 1: 237 msg = 'reading from `-` (standard input) more than once not allowed' 238 raise ValueError(msg) 239 240 if len(args) == 0: 241 return get_uris(stdin) 242 243 uris = [] 244 245 for path in args: 246 if path.startswith('https://') or path.startswith('http://'): 247 uris.append(path) 248 continue 249 250 if path == '-': 251 uris.extend(get_uris(stdin)) 252 253 with open(path, encoding='utf-8') as inp: 254 uris.extend(get_uris(inp)) 255 256 return uris 257 258 259 # style is the `inner` CSS used inside the style tag 260 style = ''' 261 body { 262 font-size: 0.9rem; 263 margin: 0 0 2rem 0; 264 font-family: system-ui, -apple-system, sans-serif; 265 } 266 267 main { 268 margin: auto; 269 display: flex; 270 width: fit-content; 271 } 272 273 h1 { 274 top: 0; 275 position: sticky; 276 font-size: 0.9rem; 277 text-align: center; 278 background-color: white; 279 } 280 281 img { 282 margin: auto; 283 margin-bottom: 1rem; 284 display: block; 285 max-width: 15ch; 286 } 287 288 section { 289 width: 48ch; 290 padding: 0.3rem; 291 margin: 0 0.1rem; 292 } 293 294 section:nth-child(2n+1) { 295 background-color: #eee; 296 } 297 298 a { 299 color: steelblue; 300 text-decoration: none; 301 } 302 303 details p { 304 line-height: 1.3rem; 305 } 306 '''.strip('\n') 307 308 309 try: 310 feeds = load_feed_uris(args) 311 312 print('<!DOCTYPE html>') 313 print('<html lang="en">') 314 print('<head>') 315 print(' <meta charset="UTF-8">') 316 print(' <link rel="icon" href="data:,">') 317 cattr = 'content="width=device-width, initial-scale=1.0"' 318 print(f' <meta name="viewport" {cattr}>') 319 if title != '': 320 print(f' <title>{escape(title)}</title>') 321 print(' <style>') 322 print(style) 323 print(' </style>') 324 print('</head>') 325 print('<body>') 326 print(' <main>') 327 328 # significantly speed-up script by loading/parsing feeds concurrently 329 with Pool(processes=min(4, len(feeds))) as pool: 330 feeds = pool.map(parse_feed, feeds) 331 332 for feed in feeds: 333 render_feed(feed) 334 335 print(' </main>') 336 print('</body>') 337 print('</html>') 338 except BrokenPipeError: 339 # quit quietly, instead of showing a confusing error message 340 stderr.close() 341 except KeyboardInterrupt: 342 exit(2) 343 except Exception as e: 344 fail(e, 1)