File: podfeed.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for podfeed.
  27 
  28 To compile a smaller-sized command-line app, you can use the `go` command as
  29 follows:
  30 
  31 go build -ldflags "-s -w" -trimpath podfeed.go
  32 */
  33 
  34 package main
  35 
  36 import (
  37     "bufio"
  38     "bytes"
  39     "encoding/base64"
  40     "encoding/xml"
  41     "errors"
  42     "flag"
  43     "fmt"
  44     "html/template"
  45     "io"
  46     "math"
  47     "net/http"
  48     "os"
  49     "regexp"
  50     "runtime"
  51     "strconv"
  52     "strings"
  53     "sync"
  54     "time"
  55 )
  56 
  57 const src = `
  58 <!DOCTYPE html>
  59 <html lang="en">
  60 
  61 <head>
  62     <meta charset="UTF-8">
  63     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  64     <link rel="icon" href="data:,">
  65     <title>{{ .Title }}</title>
  66     <style>
  67         body {
  68             font-size: 0.9rem;
  69             margin: 0 0 2rem 0;
  70             font-family: system-ui, -apple-system, sans-serif;
  71         }
  72 
  73         main {
  74             margin: auto;
  75             display: flex;
  76             width: fit-content;
  77         }
  78 
  79         h1 {
  80             top: 0;
  81             position: sticky;
  82             font-size: 0.9rem;
  83             text-align: center;
  84             background-color: white;
  85         }
  86 
  87         img {
  88             margin: auto;
  89             margin-bottom: 1rem;
  90             display: block;
  91             max-width: 15ch;
  92         }
  93 
  94         section {
  95             width: 48ch;
  96             padding: 0.3rem;
  97             margin: 0 0.1rem;
  98         }
  99 
 100         section:nth-child(2n+1) {
 101             background-color: #eee;
 102         }
 103 
 104         a {
 105             color: steelblue;
 106             text-decoration: none;
 107         }
 108 
 109         details p {
 110             line-height: 1.3rem;
 111         }
 112 
 113     </style>
 114 </head>
 115 
 116 <body>
 117     <main>
 118         {{- range .Feeds }}
 119         <article>
 120             <h1><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></h1>
 121             <img src="{{ .ImageLink | url }}">
 122             {{- range .Items }}
 123             <section>
 124                 <details>
 125                     <summary title="{{ .Tooltip }}"><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></summary>
 126                     <p>{{ .Description }}</p>
 127                 </details>
 128             </section>
 129             {{- end }}
 130         </article>
 131         {{- end }}
 132     </main>
 133 </body>
 134 
 135 </html>
 136 `
 137 
 138 const usage = `
 139 podfeed [URIs/filenames...]
 140 
 141 Keep track of what's on multiple podcasts/RSS feeds with auto-popup links and
 142 collapsible descriptions.
 143 
 144 After fetching all RSS feeds, this program emits script-free HTML code for a
 145 standalone webpage with links to all feed items, each having expandable
 146 descriptions.
 147 
 148 The cmd-line arguments can be a mix of direct URIs to podcast/RSS feeds and
 149 filenames: in any files given, each line is taken as a URI to check, unless
 150 the line is empty or starts with #, which marks it as a comment line.
 151 `
 152 
 153 // enable inlining/embedding thumbnails into page
 154 var funcs = template.FuncMap{
 155     `url`: func(s string) template.URL {
 156         return template.URL(s)
 157     },
 158 }
 159 
 160 var pageTemplate = template.Must(template.New(`main`).Funcs(funcs).Parse(src[1:]))
 161 
 162 // result is the payload given to the page template
 163 type result struct {
 164     Title string
 165     Feeds []feed
 166 }
 167 
 168 func main() {
 169     cfg := parseFlags(usage[1:])
 170 
 171     // fetch feeds concurrently
 172     res := fetch(cfg)
 173 
 174     // show which podcasts/feeds caused problems, and keep only the ones
 175     // which were loaded successfully
 176     page := result{Title: cfg.Title}
 177     for _, v := range res {
 178         if v.Problem != nil {
 179             fmt.Fprintln(os.Stderr, v.Problem.Error())
 180             continue
 181         }
 182 
 183         // limit feed's item-length, unless length-limiting was disabled via
 184         // a negative value
 185         if cfg.ItemLimit >= 0 && len(v.Feed.Items) > cfg.ItemLimit {
 186             v.Feed.Items = v.Feed.Items[:cfg.ItemLimit]
 187         }
 188         page.Feeds = append(page.Feeds, v.Feed)
 189     }
 190 
 191     // render HTML result to standard output
 192     w := bufio.NewWriter(os.Stdout)
 193     defer w.Flush()
 194     pageTemplate.Execute(w, page)
 195 }
 196 
 197 const (
 198     titleUsage      = `title for the HTML result`
 199     itemLimitUsage  = `max items shown per feed, starting from latest; negative to disable`
 200     thumbnailsUsage = `show channel/podcast thumbnails`
 201     inlineUsage     = `inline/embed thumbnails as base64 data`
 202 )
 203 
 204 // config has all the cmd-line options: each has its own default value, but
 205 // can be explicitly set via one of the cmd-line flags
 206 type config struct {
 207     Feeds      []string
 208     Title      string
 209     ItemLimit  int
 210     Thumbnails bool
 211     Inline     bool
 212 }
 213 
 214 func parseFlags(usage string) config {
 215     cfg := config{
 216         Title:      `Latest Podcast Episodes`,
 217         ItemLimit:  -1,
 218         Thumbnails: true,
 219         Inline:     true,
 220     }
 221 
 222     flag.Usage = func() {
 223         fmt.Fprintf(flag.CommandLine.Output(), "%s\n\nOptions\n\n", usage)
 224         flag.PrintDefaults()
 225     }
 226     flag.StringVar(&cfg.Title, `title`, cfg.Title, titleUsage)
 227     flag.IntVar(&cfg.ItemLimit, `max`, cfg.ItemLimit, itemLimitUsage)
 228     flag.BoolVar(&cfg.Thumbnails, `thumbs`, cfg.Thumbnails, thumbnailsUsage)
 229     flag.BoolVar(&cfg.Inline, `inline`, cfg.Inline, inlineUsage)
 230     flag.Parse()
 231 
 232     for _, a := range flag.Args() {
 233         if strings.HasPrefix(a, `https://`) || strings.HasPrefix(a, `http://`) {
 234             // it's a URI feed
 235             cfg.Feeds = append(cfg.Feeds, a)
 236             continue
 237         }
 238 
 239         // it's a text file with feed URIs, one per line
 240         lines, err := slurpFileLines(a)
 241         if err != nil {
 242             fmt.Fprintln(os.Stderr, err.Error())
 243             continue
 244         }
 245         cfg.Feeds = append(cfg.Feeds, lines...)
 246     }
 247 
 248     // if not given any filenames/URIs, read URIs from stdin
 249     if flag.NArg() == 0 {
 250         lines, err := slurpLines(os.Stdin)
 251         if err == nil {
 252             cfg.Feeds = append(cfg.Feeds, lines...)
 253         } else {
 254             fmt.Fprintln(os.Stderr, err.Error())
 255         }
 256     }
 257 
 258     return cfg
 259 }
 260 
 261 func slurpFileLines(fname string) ([]string, error) {
 262     f, err := os.Open(fname)
 263     if err != nil {
 264         return nil, err
 265     }
 266     defer f.Close()
 267     return slurpLines(f)
 268 }
 269 
 270 func slurpLines(r io.Reader) ([]string, error) {
 271     var lines []string
 272     const maxbufsize = 8 * 1024 * 1024 * 1024
 273     sc := bufio.NewScanner(r)
 274     sc.Buffer(nil, maxbufsize)
 275 
 276     for sc.Scan() {
 277         err := sc.Err()
 278         if err != nil {
 279             return lines, err
 280         }
 281 
 282         s := strings.TrimSpace(sc.Text())
 283         // ignore empty lines and comment lines
 284         if s == `` || strings.HasPrefix(s, `#`) {
 285             continue
 286         }
 287 
 288         lines = append(lines, s)
 289     }
 290 
 291     return lines, nil
 292 }
 293 
 294 // note: the tag matcher can't rid anchor tags of inner tags in its content
 295 const tagRE = `</?[a-z][a-z1-6]*( +[a-z]+ *= *"[a-z A-Z0-9-]*")*( /)?>`
 296 
 297 // regex to match opening/closing HTML tags, used in function `clean`; the
 298 // first letter explicitly excludes a, to avoid matching/replacing anchor tags
 299 var tagMatcher = regexp.MustCompile(tagRE)
 300 
 301 // regex to match ampersand escapes, used in function `clean`
 302 var ampersandMatcher = regexp.MustCompile(`&[a-zA-Z]+;`)
 303 
 304 var ampersandEscapes = map[string]string{
 305     `&nbsp`: ` `,
 306     `&amp`:  `&`,
 307     `&lt`:   `<`,
 308     `&gt`:   `>`,
 309 }
 310 
 311 // clean improves the content of descriptions, by removing typical markup
 312 // junk often found in RSS feeds
 313 func clean(s string) string {
 314     s = tagMatcher.ReplaceAllStringFunc(s, func(s string) string {
 315         if strings.HasPrefix(s, `<a `) {
 316             return s
 317         }
 318 
 319         switch s {
 320         case `</a>`:
 321             return `</a>`
 322         case `<br/>`, `<br />`:
 323             return "\n"
 324         default:
 325             return ``
 326         }
 327     })
 328 
 329     s = ampersandMatcher.ReplaceAllStringFunc(s, func(s string) string {
 330         sub, ok := ampersandEscapes[s]
 331         if ok {
 332             return sub
 333         }
 334         return s
 335     })
 336 
 337     return s
 338 }
 339 
 340 // makeDataURI encodes the bytes given into a MIME-typed base64-encoded URI
 341 func makeDataURI(b []byte, mime string) (string, error) {
 342     var buf strings.Builder
 343     base64len := int(math.Ceil(4 * float64(len(b)) / 3))
 344     buf.Grow(len(`data:`) + len(mime) + len(`;base64,`) + base64len)
 345     fmt.Fprintf(&buf, `data:%s;base64,`, mime)
 346 
 347     enc := base64.NewEncoder(base64.StdEncoding, &buf)
 348     defer enc.Close()
 349 
 350     _, err := enc.Write(b)
 351     if err != nil {
 352         return ``, err
 353     }
 354     return buf.String(), nil
 355 }
 356 
 357 // notEmptyOr simplifies control flow around this app
 358 func notEmptyOr(s, fallback string) string {
 359     if len(s) > 0 {
 360         return s
 361     }
 362     return fallback
 363 }
 364 
 365 // parseFeed takes raw RSS-string bytes and makes a feed object out of them
 366 func parseFeed(b []byte) (feed, error) {
 367     atom, err := parseAtom(b)
 368     if err != io.EOF && err != nil {
 369         return feed{}, err
 370     }
 371 
 372     if len(atom.Channels) == 0 {
 373         return feed{}, errors.New(`feed has no channels`)
 374     }
 375     if len(atom.Channels) > 1 {
 376         const msg = `multiple channels in a single feed aren't supported`
 377         return feed{}, errors.New(msg)
 378     }
 379 
 380     var feed feed
 381     ch := atom.Channels[0]
 382     feed.Title = ch.Title
 383     feed.Link = strings.Replace(ch.Link, `http://`, `https://`, 1)
 384     feed.ImageLink = ch.Image.URL
 385     feed.Description = clean(ch.Description)
 386 
 387     for _, v := range ch.Items {
 388         if len(v.Enclosures) == 0 {
 389             continue
 390         }
 391         feed.Items = append(feed.Items, adaptItem(v))
 392     }
 393     return feed, nil
 394 }
 395 
 396 // feed is a template-friendly representation of a parsed podcast feed
 397 type feed struct {
 398     Title       string
 399     Link        string
 400     ImageLink   string
 401     Description string
 402 
 403     Items []item
 404 }
 405 
 406 // item is a template-friendly representation of a podcast episode
 407 type item struct {
 408     Title       string
 409     Link        string
 410     Tooltip     string
 411     Description string
 412 }
 413 
 414 // adaptItem makes a podcast episodes's info more template-friendly
 415 func adaptItem(v atomItem) item {
 416     tooltip := ``
 417     duration := v.Duration
 418     // if duration is in seconds, turn it into the hh:mm:ss format
 419     if !strings.Contains(duration, `:`) {
 420         n, err := strconv.Atoi(duration)
 421         if err == nil && n > 0 {
 422             duration = (time.Duration(n) * time.Second).String()
 423         }
 424     }
 425 
 426     if duration != `` && v.PublicationDate != `` {
 427         const fs = `published: %s | duration: %s`
 428         tooltip = fmt.Sprintf(fs, v.PublicationDate, duration)
 429     }
 430     if duration == `` && v.PublicationDate != `` {
 431         tooltip = fmt.Sprintf(`published: %s`, v.PublicationDate)
 432     }
 433     if duration != `` && v.PublicationDate == `` {
 434         tooltip = fmt.Sprintf(`duration: %s`, v.PublicationDate)
 435     }
 436 
 437     enc := v.Enclosures[0]
 438     return item{
 439         Title:       v.Title,
 440         Link:        notEmptyOr(enc.URL, enc.AttrURL),
 441         Tooltip:     tooltip,
 442         Description: clean(v.Description),
 443     }
 444 }
 445 
 446 // atomFeed is an atom-format XML/RSS document: all its useful info is in the
 447 // Channels field, which is an array with usually only 1 item
 448 type atomFeed struct {
 449     Atom     string        `xml:"atom"`
 450     CC       string        `xml:"cc"`
 451     Channels []atomChannel `xml:"channel"`
 452     Content  string        `xml:"content"`
 453     Media    string        `xml:"media"`
 454     Version  int           `xml:"version"`
 455 }
 456 
 457 // atomChannel has all the channel tags in an atom-format document: its most
 458 // useful info is in its Items array field
 459 type atomChannel struct {
 460     Author          string     `xml:"author"`
 461     Description     string     `xml:"description"`
 462     Docs            string     `xml:"docs"`
 463     Explicit        string     `xml:"explicit"`
 464     Image           atomImage  `xml:"image"`
 465     Items           []atomItem `xml:"item"`
 466     Language        string     `xml:"language"`
 467     Link            string     `xml:"link"`
 468     PublicationDate string     `xml:"pubDate"`
 469     Summary         string     `xml:"summary"`
 470     Title           string     `xml:"title"`
 471     Subtitle        string     `xml:"subtitle"`
 472 
 473     // Copyright string `xml:"copyright"`
 474     // Generator string `xml:"generator"`
 475     // Categories     []string
 476     // Image          []string
 477     // Owner          []string
 478     // ManagingEditor string
 479     // LastBuildDate  string
 480     // Type           string
 481 }
 482 
 483 // atomImage is a channel's thumbnail image/logo
 484 type atomImage struct {
 485     Title string `xml:"title"`
 486     URL   string `xml:"url"`
 487 }
 488 
 489 // atomItem is a link to a podcast episode or to an article
 490 type atomItem struct {
 491     Author      string `xml:"author"`
 492     Description string `xml:"description"`
 493     Duration    string `xml:"duration"` // media-duration as hh:mm:ss
 494 
 495     Enclosures []atomEnclosure `xml:"enclosure"`
 496 
 497     Episode         int    `xml:"episode"`
 498     Explicit        string `xml:"explicit"`
 499     PublicationDate string `xml:"pubDate"`
 500     Summary         string `xml:"summary"`
 501     Title           string `xml:"title"`
 502 
 503     // Keywords []string // not sure these array items are strings
 504 }
 505 
 506 // atomEnclosure is an item's link, along with some useful metadata
 507 type atomEnclosure struct {
 508     Length int    `xml:"length"` // seems to be the media filesize
 509     Type   string `xml:"type"`   // MIME type for the media file
 510     URL    string `xml:"url"`    // the URL for the media file
 511 
 512     // special iTunes attributes
 513     AttrLength   int    `xml:"length,attr"`
 514     AttrType     string `xml:"type,attr"`
 515     AttrURL      string `xml:"url,attr"`
 516     AttrDuration string `xml:"duration,attr"`
 517 }
 518 
 519 // parseAtom decodes podcast/feed info from the bytes given
 520 func parseAtom(b []byte) (atomFeed, error) {
 521     var wrap atomFeed
 522     if !bytes.Contains(b, []byte(`itunes:`)) {
 523         err := xml.Unmarshal(b, &wrap)
 524         return wrap, err
 525     }
 526 
 527     b = bytes.ReplaceAll(b, []byte(`<itunes:`), []byte{'<'})
 528     b = bytes.ReplaceAll(b, []byte(`</itunes:`), []byte{'<', '/'})
 529     err := xml.Unmarshal(b, &wrap)
 530     return wrap, err
 531 }
 532 
 533 // Result is the payload/error combo resulting from trying to fetch a feed.
 534 type Result struct {
 535     Index int
 536     URI   string
 537 
 538     Feed    feed
 539     Problem error
 540 }
 541 
 542 // fetch tries to fetch all podcast feeds concurrently, to save time
 543 func fetch(cfg config) []Result {
 544     var wg sync.WaitGroup
 545     wg.Add(len(cfg.Feeds))
 546 
 547     // start rate-limiter up to the # of CPUs
 548     tickets := make(chan int, runtime.NumCPU())
 549     go func() {
 550         for i := range cfg.Feeds {
 551             tickets <- i
 552         }
 553 
 554         // wait until fetcher loop below has finished dispatching all tasks
 555         wg.Wait()
 556         close(tickets) // quit the fetcher loop
 557     }()
 558 
 559     // setup parameters and final results array
 560     res := make([]Result, len(cfg.Feeds))
 561     for i, uri := range cfg.Feeds {
 562         res[i] = Result{Index: i, URI: uri, Feed: feed{}, Problem: nil}
 563     }
 564 
 565     // concurrently fetch feeds
 566     for i := range tickets {
 567         go fetchItem(&res[i], &wg, cfg)
 568     }
 569     return res
 570 }
 571 
 572 // fetchItem is concurrently called/dispatched to try to fetch and decode a
 573 // single podcast feed: any error along the way is remembered as part of the
 574 // result, so the user can later be told about it
 575 func fetchItem(r *Result, wg *sync.WaitGroup, cfg config) {
 576     defer wg.Done()
 577 
 578     // read RSS feed
 579     b, err := slurp(r.URI)
 580     if err != nil {
 581         r.Problem = err
 582         return
 583     }
 584 
 585     // extract most important RSS info
 586     f, err := parseFeed(b)
 587     if err != nil {
 588         r.Problem = err
 589         return
 590     }
 591     // r.Feed = newFeed(f)
 592     r.Feed = f
 593 
 594     if !cfg.Thumbnails {
 595         // to hide thumbnails, use a no-data URI
 596         r.Feed.ImageLink = `data,`
 597         return
 598     }
 599 
 600     if !cfg.Inline {
 601         // if asked to, keep images as externally-linked resources
 602         return
 603     }
 604 
 605     // read image thumbnail
 606     b, err = slurp(f.ImageLink)
 607     if err != nil {
 608         r.Problem = err
 609         return
 610     }
 611 
 612     mime := `image/jpeg`
 613     if strings.Contains(f.ImageLink, `.png`) {
 614         mime = `image/png`
 615     }
 616 
 617     // data-URI-encode thumbnail, so it's part of the resulting webpage
 618     s, err := makeDataURI(b, mime)
 619     if err != nil {
 620         r.Problem = err
 621         return
 622     }
 623     r.Feed.ImageLink = s
 624 }
 625 
 626 func slurp(uri string) ([]byte, error) {
 627     resp, err := http.Get(uri)
 628     if err != nil {
 629         return nil, err
 630     }
 631     defer resp.Body.Close()
 632     return io.ReadAll(resp.Body)
 633 }