File: podfeed.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath podfeed.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "encoding/base64" 38 "encoding/xml" 39 "errors" 40 "flag" 41 "fmt" 42 "html/template" 43 "io" 44 "math" 45 "net/http" 46 "os" 47 "regexp" 48 "runtime" 49 "strconv" 50 "strings" 51 "sync" 52 "time" 53 ) 54 55 const src = ` 56 <!DOCTYPE html> 57 <html lang="en"> 58 59 <head> 60 <meta charset="UTF-8"> 61 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 62 <link rel="icon" href="data:,"> 63 <title>{{ .Title }}</title> 64 <style> 65 body { 66 font-size: 0.9rem; 67 margin: 0 0 2rem 0; 68 font-family: system-ui, -apple-system, sans-serif; 69 } 70 71 main { 72 margin: auto; 73 display: flex; 74 width: fit-content; 75 } 76 77 h1 { 78 top: 0; 79 position: sticky; 80 font-size: 0.9rem; 81 text-align: center; 82 background-color: white; 83 } 84 85 img { 86 margin: auto; 87 margin-bottom: 1rem; 88 display: block; 89 max-width: 15ch; 90 } 91 92 section { 93 width: 48ch; 94 padding: 0.3rem; 95 margin: 0 0.1rem; 96 } 97 98 section:nth-child(2n+1) { 99 background-color: #eee; 100 } 101 102 a { 103 color: steelblue; 104 text-decoration: none; 105 } 106 107 details p { 108 line-height: 1.3rem; 109 } 110 111 </style> 112 </head> 113 114 <body> 115 <main> 116 {{- range .Feeds }} 117 <article> 118 <h1><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></h1> 119 <img src="{{ .ImageLink | url }}"> 120 {{- range .Items }} 121 <section> 122 <details> 123 <summary title="{{ .Tooltip }}"><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></summary> 124 <p>{{ .Description }}</p> 125 </details> 126 </section> 127 {{- end }} 128 </article> 129 {{- end }} 130 </main> 131 </body> 132 133 </html> 134 ` 135 136 const usage = ` 137 podfeed [URIs/filenames...] 138 139 Keep track of what's on multiple podcasts/RSS feeds with auto-popup links and 140 collapsible descriptions. 141 142 After fetching all RSS feeds, this program emits script-free HTML code for a 143 standalone webpage with links to all feed items, each having expandable 144 descriptions. 145 146 The cmd-line arguments can be a mix of direct URIs to podcast/RSS feeds and 147 filenames: in any files given, each line is taken as a URI to check, unless 148 the line is empty or starts with #, which marks it as a comment line. 149 ` 150 151 // enable inlining/embedding thumbnails into page 152 var funcs = template.FuncMap{ 153 `url`: func(s string) template.URL { 154 return template.URL(s) 155 }, 156 } 157 158 var pageTemplate = template.Must(template.New(`main`).Funcs(funcs).Parse(src[1:])) 159 160 // result is the payload given to the page template 161 type result struct { 162 Title string 163 Feeds []feed 164 } 165 166 func main() { 167 cfg := parseFlags(usage[1:]) 168 169 // fetch feeds concurrently 170 res := fetch(cfg) 171 172 // show which podcasts/feeds caused problems, and keep only the ones 173 // which were loaded successfully 174 page := result{Title: cfg.Title} 175 for _, v := range res { 176 if v.Problem != nil { 177 fmt.Fprintln(os.Stderr, v.Problem.Error()) 178 continue 179 } 180 181 // limit feed's item-length, unless length-limiting was disabled via 182 // a negative value 183 if cfg.ItemLimit >= 0 && len(v.Feed.Items) > cfg.ItemLimit { 184 v.Feed.Items = v.Feed.Items[:cfg.ItemLimit] 185 } 186 page.Feeds = append(page.Feeds, v.Feed) 187 } 188 189 // render HTML result to standard output 190 w := bufio.NewWriter(os.Stdout) 191 defer w.Flush() 192 pageTemplate.Execute(w, page) 193 } 194 195 const ( 196 titleUsage = `title for the HTML result` 197 itemLimitUsage = `max items shown per feed, starting from latest; negative to disable` 198 thumbnailsUsage = `show channel/podcast thumbnails` 199 inlineUsage = `inline/embed thumbnails as base64 data` 200 ) 201 202 // config has all the cmd-line options: each has its own default value, but 203 // can be explicitly set via one of the cmd-line flags 204 type config struct { 205 Feeds []string 206 Title string 207 ItemLimit int 208 Thumbnails bool 209 Inline bool 210 } 211 212 func parseFlags(usage string) config { 213 cfg := config{ 214 Title: `Latest Podcast Episodes`, 215 ItemLimit: -1, 216 Thumbnails: true, 217 Inline: true, 218 } 219 220 flag.Usage = func() { 221 fmt.Fprintf(flag.CommandLine.Output(), "%s\n\nOptions\n\n", usage) 222 flag.PrintDefaults() 223 } 224 flag.StringVar(&cfg.Title, `title`, cfg.Title, titleUsage) 225 flag.IntVar(&cfg.ItemLimit, `max`, cfg.ItemLimit, itemLimitUsage) 226 flag.BoolVar(&cfg.Thumbnails, `thumbs`, cfg.Thumbnails, thumbnailsUsage) 227 flag.BoolVar(&cfg.Inline, `inline`, cfg.Inline, inlineUsage) 228 flag.Parse() 229 230 for _, a := range flag.Args() { 231 if strings.HasPrefix(a, `https://`) || strings.HasPrefix(a, `http://`) { 232 // it's a URI feed 233 cfg.Feeds = append(cfg.Feeds, a) 234 continue 235 } 236 237 // it's a text file with feed URIs, one per line 238 lines, err := slurpFileLines(a) 239 if err != nil { 240 fmt.Fprintln(os.Stderr, err.Error()) 241 continue 242 } 243 cfg.Feeds = append(cfg.Feeds, lines...) 244 } 245 246 // if not given any filenames/URIs, read URIs from stdin 247 if flag.NArg() == 0 { 248 lines, err := slurpLines(os.Stdin) 249 if err == nil { 250 cfg.Feeds = append(cfg.Feeds, lines...) 251 } else { 252 fmt.Fprintln(os.Stderr, err.Error()) 253 } 254 } 255 256 return cfg 257 } 258 259 func slurpFileLines(fname string) ([]string, error) { 260 f, err := os.Open(fname) 261 if err != nil { 262 return nil, err 263 } 264 defer f.Close() 265 return slurpLines(f) 266 } 267 268 func slurpLines(r io.Reader) ([]string, error) { 269 var lines []string 270 const maxbufsize = 8 * 1024 * 1024 * 1024 271 sc := bufio.NewScanner(r) 272 sc.Buffer(nil, maxbufsize) 273 274 for sc.Scan() { 275 err := sc.Err() 276 if err != nil { 277 return lines, err 278 } 279 280 s := strings.TrimSpace(sc.Text()) 281 // ignore empty lines and comment lines 282 if s == `` || strings.HasPrefix(s, `#`) { 283 continue 284 } 285 286 lines = append(lines, s) 287 } 288 289 return lines, nil 290 } 291 292 // note: the tag matcher can't rid anchor tags of inner tags in its content 293 const tagRE = `</?[a-z][a-z1-6]*( +[a-z]+ *= *"[a-z A-Z0-9-]*")*( /)?>` 294 295 // regex to match opening/closing HTML tags, used in function `clean`; the 296 // first letter explicitly excludes a, to avoid matching/replacing anchor tags 297 var tagMatcher = regexp.MustCompile(tagRE) 298 299 // regex to match ampersand escapes, used in function `clean` 300 var ampersandMatcher = regexp.MustCompile(`&[a-zA-Z]+;`) 301 302 var ampersandEscapes = map[string]string{ 303 ` `: ` `, 304 `&`: `&`, 305 `<`: `<`, 306 `>`: `>`, 307 } 308 309 // clean improves the content of descriptions, by removing typical markup 310 // junk often found in RSS feeds 311 func clean(s string) string { 312 s = tagMatcher.ReplaceAllStringFunc(s, func(s string) string { 313 if strings.HasPrefix(s, `<a `) { 314 return s 315 } 316 317 switch s { 318 case `</a>`: 319 return `</a>` 320 case `<br/>`, `<br />`: 321 return "\n" 322 default: 323 return `` 324 } 325 }) 326 327 s = ampersandMatcher.ReplaceAllStringFunc(s, func(s string) string { 328 sub, ok := ampersandEscapes[s] 329 if ok { 330 return sub 331 } 332 return s 333 }) 334 335 return s 336 } 337 338 // makeDataURI encodes the bytes given into a MIME-typed base64-encoded URI 339 func makeDataURI(b []byte, mime string) (string, error) { 340 var buf strings.Builder 341 base64len := int(math.Ceil(4 * float64(len(b)) / 3)) 342 buf.Grow(len(`data:`) + len(mime) + len(`;base64,`) + base64len) 343 fmt.Fprintf(&buf, `data:%s;base64,`, mime) 344 345 enc := base64.NewEncoder(base64.StdEncoding, &buf) 346 defer enc.Close() 347 348 _, err := enc.Write(b) 349 if err != nil { 350 return ``, err 351 } 352 return buf.String(), nil 353 } 354 355 // notEmptyOr simplifies control flow around this app 356 func notEmptyOr(s, fallback string) string { 357 if len(s) > 0 { 358 return s 359 } 360 return fallback 361 } 362 363 // parseFeed takes raw RSS-string bytes and makes a feed object out of them 364 func parseFeed(b []byte) (feed, error) { 365 atom, err := parseAtom(b) 366 if err != io.EOF && err != nil { 367 return feed{}, err 368 } 369 370 if len(atom.Channels) == 0 { 371 return feed{}, errors.New(`feed has no channels`) 372 } 373 if len(atom.Channels) > 1 { 374 const msg = `multiple channels in a single feed aren't supported` 375 return feed{}, errors.New(msg) 376 } 377 378 var feed feed 379 ch := atom.Channels[0] 380 feed.Title = ch.Title 381 feed.Link = strings.Replace(ch.Link, `http://`, `https://`, 1) 382 feed.ImageLink = ch.Image.URL 383 feed.Description = clean(ch.Description) 384 385 for _, v := range ch.Items { 386 if len(v.Enclosures) == 0 { 387 continue 388 } 389 feed.Items = append(feed.Items, adaptItem(v)) 390 } 391 return feed, nil 392 } 393 394 // feed is a template-friendly representation of a parsed podcast feed 395 type feed struct { 396 Title string 397 Link string 398 ImageLink string 399 Description string 400 401 Items []item 402 } 403 404 // item is a template-friendly representation of a podcast episode 405 type item struct { 406 Title string 407 Link string 408 Tooltip string 409 Description string 410 } 411 412 // adaptItem makes a podcast episodes's info more template-friendly 413 func adaptItem(v atomItem) item { 414 tooltip := `` 415 duration := v.Duration 416 // if duration is in seconds, turn it into the hh:mm:ss format 417 if !strings.Contains(duration, `:`) { 418 n, err := strconv.Atoi(duration) 419 if err == nil && n > 0 { 420 duration = (time.Duration(n) * time.Second).String() 421 } 422 } 423 424 if duration != `` && v.PublicationDate != `` { 425 const fs = `published: %s | duration: %s` 426 tooltip = fmt.Sprintf(fs, v.PublicationDate, duration) 427 } 428 if duration == `` && v.PublicationDate != `` { 429 tooltip = fmt.Sprintf(`published: %s`, v.PublicationDate) 430 } 431 if duration != `` && v.PublicationDate == `` { 432 tooltip = fmt.Sprintf(`duration: %s`, v.PublicationDate) 433 } 434 435 enc := v.Enclosures[0] 436 return item{ 437 Title: v.Title, 438 Link: notEmptyOr(enc.URL, enc.AttrURL), 439 Tooltip: tooltip, 440 Description: clean(v.Description), 441 } 442 } 443 444 // atomFeed is an atom-format XML/RSS document: all its useful info is in the 445 // Channels field, which is an array with usually only 1 item 446 type atomFeed struct { 447 Atom string `xml:"atom"` 448 CC string `xml:"cc"` 449 Channels []atomChannel `xml:"channel"` 450 Content string `xml:"content"` 451 Media string `xml:"media"` 452 Version int `xml:"version"` 453 } 454 455 // atomChannel has all the channel tags in an atom-format document: its most 456 // useful info is in its Items array field 457 type atomChannel struct { 458 Author string `xml:"author"` 459 Description string `xml:"description"` 460 Docs string `xml:"docs"` 461 Explicit string `xml:"explicit"` 462 Image atomImage `xml:"image"` 463 Items []atomItem `xml:"item"` 464 Language string `xml:"language"` 465 Link string `xml:"link"` 466 PublicationDate string `xml:"pubDate"` 467 Summary string `xml:"summary"` 468 Title string `xml:"title"` 469 Subtitle string `xml:"subtitle"` 470 471 // Copyright string `xml:"copyright"` 472 // Generator string `xml:"generator"` 473 // Categories []string 474 // Image []string 475 // Owner []string 476 // ManagingEditor string 477 // LastBuildDate string 478 // Type string 479 } 480 481 // atomImage is a channel's thumbnail image/logo 482 type atomImage struct { 483 Title string `xml:"title"` 484 URL string `xml:"url"` 485 } 486 487 // atomItem is a link to a podcast episode or to an article 488 type atomItem struct { 489 Author string `xml:"author"` 490 Description string `xml:"description"` 491 Duration string `xml:"duration"` // media-duration as hh:mm:ss 492 493 Enclosures []atomEnclosure `xml:"enclosure"` 494 495 Episode int `xml:"episode"` 496 Explicit string `xml:"explicit"` 497 PublicationDate string `xml:"pubDate"` 498 Summary string `xml:"summary"` 499 Title string `xml:"title"` 500 501 // Keywords []string // not sure these array items are strings 502 } 503 504 // atomEnclosure is an item's link, along with some useful metadata 505 type atomEnclosure struct { 506 Length int `xml:"length"` // seems to be the media filesize 507 Type string `xml:"type"` // MIME type for the media file 508 URL string `xml:"url"` // the URL for the media file 509 510 // special iTunes attributes 511 AttrLength int `xml:"length,attr"` 512 AttrType string `xml:"type,attr"` 513 AttrURL string `xml:"url,attr"` 514 AttrDuration string `xml:"duration,attr"` 515 } 516 517 // parseAtom decodes podcast/feed info from the bytes given 518 func parseAtom(b []byte) (atomFeed, error) { 519 var wrap atomFeed 520 if !bytes.Contains(b, []byte(`itunes:`)) { 521 err := xml.Unmarshal(b, &wrap) 522 return wrap, err 523 } 524 525 b = bytes.ReplaceAll(b, []byte(`<itunes:`), []byte{'<'}) 526 b = bytes.ReplaceAll(b, []byte(`</itunes:`), []byte{'<', '/'}) 527 err := xml.Unmarshal(b, &wrap) 528 return wrap, err 529 } 530 531 // Result is the payload/error combo resulting from trying to fetch a feed. 532 type Result struct { 533 Index int 534 URI string 535 536 Feed feed 537 Problem error 538 } 539 540 // fetch tries to fetch all podcast feeds concurrently, to save time 541 func fetch(cfg config) []Result { 542 var wg sync.WaitGroup 543 wg.Add(len(cfg.Feeds)) 544 545 // start rate-limiter up to the # of CPUs 546 tickets := make(chan int, runtime.NumCPU()) 547 go func() { 548 for i := range cfg.Feeds { 549 tickets <- i 550 } 551 552 // wait until fetcher loop below has finished dispatching all tasks 553 wg.Wait() 554 close(tickets) // quit the fetcher loop 555 }() 556 557 // setup parameters and final results array 558 res := make([]Result, len(cfg.Feeds)) 559 for i, uri := range cfg.Feeds { 560 res[i] = Result{Index: i, URI: uri, Feed: feed{}, Problem: nil} 561 } 562 563 // concurrently fetch feeds 564 for i := range tickets { 565 go fetchItem(&res[i], &wg, cfg) 566 } 567 return res 568 } 569 570 // fetchItem is concurrently called/dispatched to try to fetch and decode a 571 // single podcast feed: any error along the way is remembered as part of the 572 // result, so the user can later be told about it 573 func fetchItem(r *Result, wg *sync.WaitGroup, cfg config) { 574 defer wg.Done() 575 576 // read RSS feed 577 b, err := slurp(r.URI) 578 if err != nil { 579 r.Problem = err 580 return 581 } 582 583 // extract most important RSS info 584 f, err := parseFeed(b) 585 if err != nil { 586 r.Problem = err 587 return 588 } 589 // r.Feed = newFeed(f) 590 r.Feed = f 591 592 if !cfg.Thumbnails { 593 // to hide thumbnails, use a no-data URI 594 r.Feed.ImageLink = `data,` 595 return 596 } 597 598 if !cfg.Inline { 599 // if asked to, keep images as externally-linked resources 600 return 601 } 602 603 // read image thumbnail 604 b, err = slurp(f.ImageLink) 605 if err != nil { 606 r.Problem = err 607 return 608 } 609 610 mime := `image/jpeg` 611 if strings.Contains(f.ImageLink, `.png`) { 612 mime = `image/png` 613 } 614 615 // data-URI-encode thumbnail, so it's part of the resulting webpage 616 s, err := makeDataURI(b, mime) 617 if err != nil { 618 r.Problem = err 619 return 620 } 621 r.Feed.ImageLink = s 622 } 623 624 func slurp(uri string) ([]byte, error) { 625 resp, err := http.Get(uri) 626 if err != nil { 627 return nil, err 628 } 629 defer resp.Body.Close() 630 return io.ReadAll(resp.Body) 631 }