File: podfeed.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath podfeed.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "encoding/base64"
  38     "encoding/xml"
  39     "errors"
  40     "flag"
  41     "fmt"
  42     "html/template"
  43     "io"
  44     "math"
  45     "net/http"
  46     "os"
  47     "regexp"
  48     "runtime"
  49     "strconv"
  50     "strings"
  51     "sync"
  52     "time"
  53 )
  54 
  55 const src = `
  56 <!DOCTYPE html>
  57 <html lang="en">
  58 
  59 <head>
  60     <meta charset="UTF-8">
  61     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  62     <link rel="icon" href="data:,">
  63     <title>{{ .Title }}</title>
  64     <style>
  65         body {
  66             font-size: 0.9rem;
  67             margin: 0 0 2rem 0;
  68             font-family: system-ui, -apple-system, sans-serif;
  69         }
  70 
  71         main {
  72             margin: auto;
  73             display: flex;
  74             width: fit-content;
  75         }
  76 
  77         h1 {
  78             top: 0;
  79             position: sticky;
  80             font-size: 0.9rem;
  81             text-align: center;
  82             background-color: white;
  83         }
  84 
  85         img {
  86             margin: auto;
  87             margin-bottom: 1rem;
  88             display: block;
  89             max-width: 15ch;
  90         }
  91 
  92         section {
  93             width: 48ch;
  94             padding: 0.3rem;
  95             margin: 0 0.1rem;
  96         }
  97 
  98         section:nth-child(2n+1) {
  99             background-color: #eee;
 100         }
 101 
 102         a {
 103             color: steelblue;
 104             text-decoration: none;
 105         }
 106 
 107         details p {
 108             line-height: 1.3rem;
 109         }
 110 
 111     </style>
 112 </head>
 113 
 114 <body>
 115     <main>
 116         {{- range .Feeds }}
 117         <article>
 118             <h1><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></h1>
 119             <img src="{{ .ImageLink | url }}">
 120             {{- range .Items }}
 121             <section>
 122                 <details>
 123                     <summary title="{{ .Tooltip }}"><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></summary>
 124                     <p>{{ .Description }}</p>
 125                 </details>
 126             </section>
 127             {{- end }}
 128         </article>
 129         {{- end }}
 130     </main>
 131 </body>
 132 
 133 </html>
 134 `
 135 
 136 const usage = `
 137 podfeed [URIs/filenames...]
 138 
 139 Keep track of what's on multiple podcasts/RSS feeds with auto-popup links and
 140 collapsible descriptions.
 141 
 142 After fetching all RSS feeds, this program emits script-free HTML code for a
 143 standalone webpage with links to all feed items, each having expandable
 144 descriptions.
 145 
 146 The cmd-line arguments can be a mix of direct URIs to podcast/RSS feeds and
 147 filenames: in any files given, each line is taken as a URI to check, unless
 148 the line is empty or starts with #, which marks it as a comment line.
 149 `
 150 
 151 // enable inlining/embedding thumbnails into page
 152 var funcs = template.FuncMap{
 153     `url`: func(s string) template.URL {
 154         return template.URL(s)
 155     },
 156 }
 157 
 158 var pageTemplate = template.Must(template.New(`main`).Funcs(funcs).Parse(src[1:]))
 159 
 160 // result is the payload given to the page template
 161 type result struct {
 162     Title string
 163     Feeds []feed
 164 }
 165 
 166 func main() {
 167     cfg := parseFlags(usage[1:])
 168 
 169     // fetch feeds concurrently
 170     res := fetch(cfg)
 171 
 172     // show which podcasts/feeds caused problems, and keep only the ones
 173     // which were loaded successfully
 174     page := result{Title: cfg.Title}
 175     for _, v := range res {
 176         if v.Problem != nil {
 177             fmt.Fprintln(os.Stderr, v.Problem.Error())
 178             continue
 179         }
 180 
 181         // limit feed's item-length, unless length-limiting was disabled via
 182         // a negative value
 183         if cfg.ItemLimit >= 0 && len(v.Feed.Items) > cfg.ItemLimit {
 184             v.Feed.Items = v.Feed.Items[:cfg.ItemLimit]
 185         }
 186         page.Feeds = append(page.Feeds, v.Feed)
 187     }
 188 
 189     // render HTML result to standard output
 190     w := bufio.NewWriter(os.Stdout)
 191     defer w.Flush()
 192     pageTemplate.Execute(w, page)
 193 }
 194 
 195 const (
 196     titleUsage      = `title for the HTML result`
 197     itemLimitUsage  = `max items shown per feed, starting from latest; negative to disable`
 198     thumbnailsUsage = `show channel/podcast thumbnails`
 199     inlineUsage     = `inline/embed thumbnails as base64 data`
 200 )
 201 
 202 // config has all the cmd-line options: each has its own default value, but
 203 // can be explicitly set via one of the cmd-line flags
 204 type config struct {
 205     Feeds      []string
 206     Title      string
 207     ItemLimit  int
 208     Thumbnails bool
 209     Inline     bool
 210 }
 211 
 212 func parseFlags(usage string) config {
 213     cfg := config{
 214         Title:      `Latest Podcast Episodes`,
 215         ItemLimit:  -1,
 216         Thumbnails: true,
 217         Inline:     true,
 218     }
 219 
 220     flag.Usage = func() {
 221         fmt.Fprintf(flag.CommandLine.Output(), "%s\n\nOptions\n\n", usage)
 222         flag.PrintDefaults()
 223     }
 224     flag.StringVar(&cfg.Title, `title`, cfg.Title, titleUsage)
 225     flag.IntVar(&cfg.ItemLimit, `max`, cfg.ItemLimit, itemLimitUsage)
 226     flag.BoolVar(&cfg.Thumbnails, `thumbs`, cfg.Thumbnails, thumbnailsUsage)
 227     flag.BoolVar(&cfg.Inline, `inline`, cfg.Inline, inlineUsage)
 228     flag.Parse()
 229 
 230     for _, a := range flag.Args() {
 231         if strings.HasPrefix(a, `https://`) || strings.HasPrefix(a, `http://`) {
 232             // it's a URI feed
 233             cfg.Feeds = append(cfg.Feeds, a)
 234             continue
 235         }
 236 
 237         // it's a text file with feed URIs, one per line
 238         lines, err := slurpFileLines(a)
 239         if err != nil {
 240             fmt.Fprintln(os.Stderr, err.Error())
 241             continue
 242         }
 243         cfg.Feeds = append(cfg.Feeds, lines...)
 244     }
 245 
 246     // if not given any filenames/URIs, read URIs from stdin
 247     if flag.NArg() == 0 {
 248         lines, err := slurpLines(os.Stdin)
 249         if err == nil {
 250             cfg.Feeds = append(cfg.Feeds, lines...)
 251         } else {
 252             fmt.Fprintln(os.Stderr, err.Error())
 253         }
 254     }
 255 
 256     return cfg
 257 }
 258 
 259 func slurpFileLines(fname string) ([]string, error) {
 260     f, err := os.Open(fname)
 261     if err != nil {
 262         return nil, err
 263     }
 264     defer f.Close()
 265     return slurpLines(f)
 266 }
 267 
 268 func slurpLines(r io.Reader) ([]string, error) {
 269     var lines []string
 270     const maxbufsize = 8 * 1024 * 1024 * 1024
 271     sc := bufio.NewScanner(r)
 272     sc.Buffer(nil, maxbufsize)
 273 
 274     for sc.Scan() {
 275         err := sc.Err()
 276         if err != nil {
 277             return lines, err
 278         }
 279 
 280         s := strings.TrimSpace(sc.Text())
 281         // ignore empty lines and comment lines
 282         if s == `` || strings.HasPrefix(s, `#`) {
 283             continue
 284         }
 285 
 286         lines = append(lines, s)
 287     }
 288 
 289     return lines, nil
 290 }
 291 
 292 // note: the tag matcher can't rid anchor tags of inner tags in its content
 293 const tagRE = `</?[a-z][a-z1-6]*( +[a-z]+ *= *"[a-z A-Z0-9-]*")*( /)?>`
 294 
 295 // regex to match opening/closing HTML tags, used in function `clean`; the
 296 // first letter explicitly excludes a, to avoid matching/replacing anchor tags
 297 var tagMatcher = regexp.MustCompile(tagRE)
 298 
 299 // regex to match ampersand escapes, used in function `clean`
 300 var ampersandMatcher = regexp.MustCompile(`&[a-zA-Z]+;`)
 301 
 302 var ampersandEscapes = map[string]string{
 303     `&nbsp`: ` `,
 304     `&amp`:  `&`,
 305     `&lt`:   `<`,
 306     `&gt`:   `>`,
 307 }
 308 
 309 // clean improves the content of descriptions, by removing typical markup
 310 // junk often found in RSS feeds
 311 func clean(s string) string {
 312     s = tagMatcher.ReplaceAllStringFunc(s, func(s string) string {
 313         if strings.HasPrefix(s, `<a `) {
 314             return s
 315         }
 316 
 317         switch s {
 318         case `</a>`:
 319             return `</a>`
 320         case `<br/>`, `<br />`:
 321             return "\n"
 322         default:
 323             return ``
 324         }
 325     })
 326 
 327     s = ampersandMatcher.ReplaceAllStringFunc(s, func(s string) string {
 328         sub, ok := ampersandEscapes[s]
 329         if ok {
 330             return sub
 331         }
 332         return s
 333     })
 334 
 335     return s
 336 }
 337 
 338 // makeDataURI encodes the bytes given into a MIME-typed base64-encoded URI
 339 func makeDataURI(b []byte, mime string) (string, error) {
 340     var buf strings.Builder
 341     base64len := int(math.Ceil(4 * float64(len(b)) / 3))
 342     buf.Grow(len(`data:`) + len(mime) + len(`;base64,`) + base64len)
 343     fmt.Fprintf(&buf, `data:%s;base64,`, mime)
 344 
 345     enc := base64.NewEncoder(base64.StdEncoding, &buf)
 346     defer enc.Close()
 347 
 348     _, err := enc.Write(b)
 349     if err != nil {
 350         return ``, err
 351     }
 352     return buf.String(), nil
 353 }
 354 
 355 // notEmptyOr simplifies control flow around this app
 356 func notEmptyOr(s, fallback string) string {
 357     if len(s) > 0 {
 358         return s
 359     }
 360     return fallback
 361 }
 362 
 363 // parseFeed takes raw RSS-string bytes and makes a feed object out of them
 364 func parseFeed(b []byte) (feed, error) {
 365     atom, err := parseAtom(b)
 366     if err != io.EOF && err != nil {
 367         return feed{}, err
 368     }
 369 
 370     if len(atom.Channels) == 0 {
 371         return feed{}, errors.New(`feed has no channels`)
 372     }
 373     if len(atom.Channels) > 1 {
 374         const msg = `multiple channels in a single feed aren't supported`
 375         return feed{}, errors.New(msg)
 376     }
 377 
 378     var feed feed
 379     ch := atom.Channels[0]
 380     feed.Title = ch.Title
 381     feed.Link = strings.Replace(ch.Link, `http://`, `https://`, 1)
 382     feed.ImageLink = ch.Image.URL
 383     feed.Description = clean(ch.Description)
 384 
 385     for _, v := range ch.Items {
 386         if len(v.Enclosures) == 0 {
 387             continue
 388         }
 389         feed.Items = append(feed.Items, adaptItem(v))
 390     }
 391     return feed, nil
 392 }
 393 
 394 // feed is a template-friendly representation of a parsed podcast feed
 395 type feed struct {
 396     Title       string
 397     Link        string
 398     ImageLink   string
 399     Description string
 400 
 401     Items []item
 402 }
 403 
 404 // item is a template-friendly representation of a podcast episode
 405 type item struct {
 406     Title       string
 407     Link        string
 408     Tooltip     string
 409     Description string
 410 }
 411 
 412 // adaptItem makes a podcast episodes's info more template-friendly
 413 func adaptItem(v atomItem) item {
 414     tooltip := ``
 415     duration := v.Duration
 416     // if duration is in seconds, turn it into the hh:mm:ss format
 417     if !strings.Contains(duration, `:`) {
 418         n, err := strconv.Atoi(duration)
 419         if err == nil && n > 0 {
 420             duration = (time.Duration(n) * time.Second).String()
 421         }
 422     }
 423 
 424     if duration != `` && v.PublicationDate != `` {
 425         const fs = `published: %s | duration: %s`
 426         tooltip = fmt.Sprintf(fs, v.PublicationDate, duration)
 427     }
 428     if duration == `` && v.PublicationDate != `` {
 429         tooltip = fmt.Sprintf(`published: %s`, v.PublicationDate)
 430     }
 431     if duration != `` && v.PublicationDate == `` {
 432         tooltip = fmt.Sprintf(`duration: %s`, v.PublicationDate)
 433     }
 434 
 435     enc := v.Enclosures[0]
 436     return item{
 437         Title:       v.Title,
 438         Link:        notEmptyOr(enc.URL, enc.AttrURL),
 439         Tooltip:     tooltip,
 440         Description: clean(v.Description),
 441     }
 442 }
 443 
 444 // atomFeed is an atom-format XML/RSS document: all its useful info is in the
 445 // Channels field, which is an array with usually only 1 item
 446 type atomFeed struct {
 447     Atom     string        `xml:"atom"`
 448     CC       string        `xml:"cc"`
 449     Channels []atomChannel `xml:"channel"`
 450     Content  string        `xml:"content"`
 451     Media    string        `xml:"media"`
 452     Version  int           `xml:"version"`
 453 }
 454 
 455 // atomChannel has all the channel tags in an atom-format document: its most
 456 // useful info is in its Items array field
 457 type atomChannel struct {
 458     Author          string     `xml:"author"`
 459     Description     string     `xml:"description"`
 460     Docs            string     `xml:"docs"`
 461     Explicit        string     `xml:"explicit"`
 462     Image           atomImage  `xml:"image"`
 463     Items           []atomItem `xml:"item"`
 464     Language        string     `xml:"language"`
 465     Link            string     `xml:"link"`
 466     PublicationDate string     `xml:"pubDate"`
 467     Summary         string     `xml:"summary"`
 468     Title           string     `xml:"title"`
 469     Subtitle        string     `xml:"subtitle"`
 470 
 471     // Copyright string `xml:"copyright"`
 472     // Generator string `xml:"generator"`
 473     // Categories     []string
 474     // Image          []string
 475     // Owner          []string
 476     // ManagingEditor string
 477     // LastBuildDate  string
 478     // Type           string
 479 }
 480 
 481 // atomImage is a channel's thumbnail image/logo
 482 type atomImage struct {
 483     Title string `xml:"title"`
 484     URL   string `xml:"url"`
 485 }
 486 
 487 // atomItem is a link to a podcast episode or to an article
 488 type atomItem struct {
 489     Author      string `xml:"author"`
 490     Description string `xml:"description"`
 491     Duration    string `xml:"duration"` // media-duration as hh:mm:ss
 492 
 493     Enclosures []atomEnclosure `xml:"enclosure"`
 494 
 495     Episode         int    `xml:"episode"`
 496     Explicit        string `xml:"explicit"`
 497     PublicationDate string `xml:"pubDate"`
 498     Summary         string `xml:"summary"`
 499     Title           string `xml:"title"`
 500 
 501     // Keywords []string // not sure these array items are strings
 502 }
 503 
 504 // atomEnclosure is an item's link, along with some useful metadata
 505 type atomEnclosure struct {
 506     Length int    `xml:"length"` // seems to be the media filesize
 507     Type   string `xml:"type"`   // MIME type for the media file
 508     URL    string `xml:"url"`    // the URL for the media file
 509 
 510     // special iTunes attributes
 511     AttrLength   int    `xml:"length,attr"`
 512     AttrType     string `xml:"type,attr"`
 513     AttrURL      string `xml:"url,attr"`
 514     AttrDuration string `xml:"duration,attr"`
 515 }
 516 
 517 // parseAtom decodes podcast/feed info from the bytes given
 518 func parseAtom(b []byte) (atomFeed, error) {
 519     var wrap atomFeed
 520     if !bytes.Contains(b, []byte(`itunes:`)) {
 521         err := xml.Unmarshal(b, &wrap)
 522         return wrap, err
 523     }
 524 
 525     b = bytes.ReplaceAll(b, []byte(`<itunes:`), []byte{'<'})
 526     b = bytes.ReplaceAll(b, []byte(`</itunes:`), []byte{'<', '/'})
 527     err := xml.Unmarshal(b, &wrap)
 528     return wrap, err
 529 }
 530 
 531 // Result is the payload/error combo resulting from trying to fetch a feed.
 532 type Result struct {
 533     Index int
 534     URI   string
 535 
 536     Feed    feed
 537     Problem error
 538 }
 539 
 540 // fetch tries to fetch all podcast feeds concurrently, to save time
 541 func fetch(cfg config) []Result {
 542     var wg sync.WaitGroup
 543     wg.Add(len(cfg.Feeds))
 544 
 545     // start rate-limiter up to the # of CPUs
 546     tickets := make(chan int, runtime.NumCPU())
 547     go func() {
 548         for i := range cfg.Feeds {
 549             tickets <- i
 550         }
 551 
 552         // wait until fetcher loop below has finished dispatching all tasks
 553         wg.Wait()
 554         close(tickets) // quit the fetcher loop
 555     }()
 556 
 557     // setup parameters and final results array
 558     res := make([]Result, len(cfg.Feeds))
 559     for i, uri := range cfg.Feeds {
 560         res[i] = Result{Index: i, URI: uri, Feed: feed{}, Problem: nil}
 561     }
 562 
 563     // concurrently fetch feeds
 564     for i := range tickets {
 565         go fetchItem(&res[i], &wg, cfg)
 566     }
 567     return res
 568 }
 569 
 570 // fetchItem is concurrently called/dispatched to try to fetch and decode a
 571 // single podcast feed: any error along the way is remembered as part of the
 572 // result, so the user can later be told about it
 573 func fetchItem(r *Result, wg *sync.WaitGroup, cfg config) {
 574     defer wg.Done()
 575 
 576     // read RSS feed
 577     b, err := slurp(r.URI)
 578     if err != nil {
 579         r.Problem = err
 580         return
 581     }
 582 
 583     // extract most important RSS info
 584     f, err := parseFeed(b)
 585     if err != nil {
 586         r.Problem = err
 587         return
 588     }
 589     // r.Feed = newFeed(f)
 590     r.Feed = f
 591 
 592     if !cfg.Thumbnails {
 593         // to hide thumbnails, use a no-data URI
 594         r.Feed.ImageLink = `data,`
 595         return
 596     }
 597 
 598     if !cfg.Inline {
 599         // if asked to, keep images as externally-linked resources
 600         return
 601     }
 602 
 603     // read image thumbnail
 604     b, err = slurp(f.ImageLink)
 605     if err != nil {
 606         r.Problem = err
 607         return
 608     }
 609 
 610     mime := `image/jpeg`
 611     if strings.Contains(f.ImageLink, `.png`) {
 612         mime = `image/png`
 613     }
 614 
 615     // data-URI-encode thumbnail, so it's part of the resulting webpage
 616     s, err := makeDataURI(b, mime)
 617     if err != nil {
 618         r.Problem = err
 619         return
 620     }
 621     r.Feed.ImageLink = s
 622 }
 623 
 624 func slurp(uri string) ([]byte, error) {
 625     resp, err := http.Get(uri)
 626     if err != nil {
 627         return nil, err
 628     }
 629     defer resp.Body.Close()
 630     return io.ReadAll(resp.Body)
 631 }