File: podfeed/atom.go
   1 package main
   2 
   3 import (
   4     "bytes"
   5     "encoding/xml"
   6 )
   7 
   8 // atomFeed is an atom-format XML/RSS document: all its useful info is in the
   9 // Channels field, which is an array with usually only 1 item
  10 type atomFeed struct {
  11     Atom     string        `xml:"atom"`
  12     CC       string        `xml:"cc"`
  13     Channels []atomChannel `xml:"channel"`
  14     Content  string        `xml:"content"`
  15     Media    string        `xml:"media"`
  16     Version  int           `xml:"version"`
  17 }
  18 
  19 // atomChannel has all the channel tags in an atom-format document: its most
  20 // useful info is in its Items array field
  21 type atomChannel struct {
  22     Author          string     `xml:"author"`
  23     Description     string     `xml:"description"`
  24     Docs            string     `xml:"docs"`
  25     Explicit        string     `xml:"explicit"`
  26     Image           atomImage  `xml:"image"`
  27     Items           []atomItem `xml:"item"`
  28     Language        string     `xml:"language"`
  29     Link            string     `xml:"link"`
  30     PublicationDate string     `xml:"pubDate"`
  31     Summary         string     `xml:"summary"`
  32     Title           string     `xml:"title"`
  33     Subtitle        string     `xml:"subtitle"`
  34 
  35     // Copyright string `xml:"copyright"`
  36     // Generator string `xml:"generator"`
  37     // Categories     []string
  38     // Image          []string
  39     // Owner          []string
  40     // ManagingEditor string
  41     // LastBuildDate  string
  42     // Type           string
  43 }
  44 
  45 // atomImage is a channel's thumbnail image/logo
  46 type atomImage struct {
  47     Title string `xml:"title"`
  48     URL   string `xml:"url"`
  49 }
  50 
  51 // atomItem is a link to a podcast episode or to an article
  52 type atomItem struct {
  53     Author      string `xml:"author"`
  54     Description string `xml:"description"`
  55     Duration    string `xml:"duration"` // media-duration as hh:mm:ss
  56 
  57     Enclosures []atomEnclosure `xml:"enclosure"`
  58 
  59     Episode         int    `xml:"episode"`
  60     Explicit        string `xml:"explicit"`
  61     PublicationDate string `xml:"pubDate"`
  62     Summary         string `xml:"summary"`
  63     Title           string `xml:"title"`
  64 
  65     // Keywords []string // not sure these array items are strings
  66 }
  67 
  68 // atomEnclosure is an item's link, along with some useful metadata
  69 type atomEnclosure struct {
  70     Length int    `xml:"length"` // seems to be the media filesize
  71     Type   string `xml:"type"`   // MIME type for the media file
  72     URL    string `xml:"url"`    // the URL for the media file
  73 
  74     // special iTunes attributes
  75     AttrLength   int    `xml:"length,attr"`
  76     AttrType     string `xml:"type,attr"`
  77     AttrURL      string `xml:"url,attr"`
  78     AttrDuration string `xml:"duration,attr"`
  79 }
  80 
  81 // parseAtom decodes podcast/feed info from the bytes given
  82 func parseAtom(b []byte) (atomFeed, error) {
  83     var wrap atomFeed
  84     if !bytes.Contains(b, []byte(`itunes:`)) {
  85         err := xml.Unmarshal(b, &wrap)
  86         return wrap, err
  87     }
  88 
  89     b = bytes.ReplaceAll(b, []byte(`<itunes:`), []byte{'<'})
  90     b = bytes.ReplaceAll(b, []byte(`</itunes:`), []byte{'<', '/'})
  91     err := xml.Unmarshal(b, &wrap)
  92     return wrap, err
  93 }

     File: podfeed/config.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "flag"
   6     "fmt"
   7     "io"
   8     "os"
   9     "strings"
  10 )
  11 
  12 const (
  13     titleUsage      = `title for the HTML result`
  14     itemLimitUsage  = `max items shown per feed, starting from latest; negative to disable`
  15     thumbnailsUsage = `show channel/podcast thumbnails`
  16     inlineUsage     = `inline/embed thumbnails as base64 data`
  17 )
  18 
  19 // config has all the cmd-line options: each has its own default value, but
  20 // can be explicitly set via one of the cmd-line flags
  21 type config struct {
  22     Feeds      []string
  23     Title      string
  24     ItemLimit  int
  25     Thumbnails bool
  26     Inline     bool
  27 }
  28 
  29 func parseFlags(usage string) config {
  30     cfg := config{
  31         Title:      `Latest Podcast Episodes`,
  32         ItemLimit:  -1,
  33         Thumbnails: true,
  34         Inline:     true,
  35     }
  36 
  37     flag.Usage = func() {
  38         fmt.Fprintf(flag.CommandLine.Output(), "%s\n\nOptions\n\n", usage)
  39         flag.PrintDefaults()
  40     }
  41     flag.StringVar(&cfg.Title, `title`, cfg.Title, titleUsage)
  42     flag.IntVar(&cfg.ItemLimit, `max`, cfg.ItemLimit, itemLimitUsage)
  43     flag.BoolVar(&cfg.Thumbnails, `thumbs`, cfg.Thumbnails, thumbnailsUsage)
  44     flag.BoolVar(&cfg.Inline, `inline`, cfg.Inline, inlineUsage)
  45     flag.Parse()
  46 
  47     for _, a := range flag.Args() {
  48         if strings.HasPrefix(a, `https://`) || strings.HasPrefix(a, `http://`) {
  49             // it's a URI feed
  50             cfg.Feeds = append(cfg.Feeds, a)
  51             continue
  52         }
  53 
  54         // it's a text file with feed URIs, one per line
  55         lines, err := slurpFileLines(a)
  56         if err != nil {
  57             fmt.Fprintln(os.Stderr, err.Error())
  58             continue
  59         }
  60         cfg.Feeds = append(cfg.Feeds, lines...)
  61     }
  62 
  63     // if not given any filenames/URIs, read URIs from stdin
  64     if flag.NArg() == 0 {
  65         lines, err := slurpLines(os.Stdin)
  66         if err == nil {
  67             cfg.Feeds = append(cfg.Feeds, lines...)
  68         } else {
  69             fmt.Fprintln(os.Stderr, err.Error())
  70         }
  71     }
  72 
  73     return cfg
  74 }
  75 
  76 func slurpFileLines(fname string) ([]string, error) {
  77     f, err := os.Open(fname)
  78     if err != nil {
  79         return nil, err
  80     }
  81     defer f.Close()
  82     return slurpLines(f)
  83 }
  84 
  85 func slurpLines(r io.Reader) ([]string, error) {
  86     var lines []string
  87     const maxbufsize = 8 * 1024 * 1024 * 1024
  88     sc := bufio.NewScanner(r)
  89     sc.Buffer(nil, maxbufsize)
  90 
  91     for sc.Scan() {
  92         err := sc.Err()
  93         if err != nil {
  94             return lines, err
  95         }
  96 
  97         s := strings.TrimSpace(sc.Text())
  98         // ignore empty lines and comment lines
  99         if s == `` || strings.HasPrefix(s, `#`) {
 100             continue
 101         }
 102 
 103         lines = append(lines, s)
 104     }
 105 
 106     return lines, nil
 107 }

     File: podfeed/feeds.go
   1 package main
   2 
   3 import (
   4     "errors"
   5     "fmt"
   6     "io"
   7     "strconv"
   8     "strings"
   9     "time"
  10 )
  11 
  12 // parseFeed takes raw RSS-string bytes and makes a feed object out of them
  13 func parseFeed(b []byte) (feed, error) {
  14     atom, err := parseAtom(b)
  15     if err != io.EOF && err != nil {
  16         return feed{}, err
  17     }
  18 
  19     if len(atom.Channels) == 0 {
  20         return feed{}, errors.New(`feed has no channels`)
  21     }
  22     if len(atom.Channels) > 1 {
  23         const msg = `multiple channels in a single feed aren't supported`
  24         return feed{}, errors.New(msg)
  25     }
  26 
  27     var feed feed
  28     ch := atom.Channels[0]
  29     feed.Title = ch.Title
  30     feed.Link = strings.Replace(ch.Link, `http://`, `https://`, 1)
  31     feed.ImageLink = ch.Image.URL
  32     feed.Description = clean(ch.Description)
  33 
  34     for _, v := range ch.Items {
  35         if len(v.Enclosures) == 0 {
  36             continue
  37         }
  38         feed.Items = append(feed.Items, adaptItem(v))
  39     }
  40     return feed, nil
  41 }
  42 
  43 // feed is a template-friendly representation of a parsed podcast feed
  44 type feed struct {
  45     Title       string
  46     Link        string
  47     ImageLink   string
  48     Description string
  49 
  50     Items []item
  51 }
  52 
  53 // item is a template-friendly representation of a podcast episode
  54 type item struct {
  55     Title       string
  56     Link        string
  57     Tooltip     string
  58     Description string
  59 }
  60 
  61 // adaptItem makes a podcast episodes's info more template-friendly
  62 func adaptItem(v atomItem) item {
  63     tooltip := ``
  64     duration := v.Duration
  65     // if duration is in seconds, turn it into the hh:mm:ss format
  66     if !strings.Contains(duration, `:`) {
  67         n, err := strconv.Atoi(duration)
  68         if err == nil && n > 0 {
  69             duration = (time.Duration(n) * time.Second).String()
  70         }
  71     }
  72 
  73     if duration != `` && v.PublicationDate != `` {
  74         const fs = `published: %s | duration: %s`
  75         tooltip = fmt.Sprintf(fs, v.PublicationDate, duration)
  76     }
  77     if duration == `` && v.PublicationDate != `` {
  78         tooltip = fmt.Sprintf(`published: %s`, v.PublicationDate)
  79     }
  80     if duration != `` && v.PublicationDate == `` {
  81         tooltip = fmt.Sprintf(`duration: %s`, v.PublicationDate)
  82     }
  83 
  84     enc := v.Enclosures[0]
  85     return item{
  86         Title:       v.Title,
  87         Link:        notEmptyOr(enc.URL, enc.AttrURL),
  88         Tooltip:     tooltip,
  89         Description: clean(v.Description),
  90     }
  91 }

     File: podfeed/fetch.go
   1 package main
   2 
   3 import (
   4     "io"
   5     "net/http"
   6     "runtime"
   7     "strings"
   8     "sync"
   9 )
  10 
  11 // Result is the payload/error combo resulting from trying to fetch a feed.
  12 type Result struct {
  13     Index int
  14     URI   string
  15 
  16     Feed    feed
  17     Problem error
  18 }
  19 
  20 // fetch tries to fetch all podcast feeds concurrently, to save time
  21 func fetch(cfg config) []Result {
  22     var wg sync.WaitGroup
  23     wg.Add(len(cfg.Feeds))
  24 
  25     // start rate-limiter up to the # of CPUs
  26     tickets := make(chan int, runtime.NumCPU())
  27     go func() {
  28         for i := range cfg.Feeds {
  29             tickets <- i
  30         }
  31 
  32         // wait until fetcher loop below has finished dispatching all tasks
  33         wg.Wait()
  34         close(tickets) // quit the fetcher loop
  35     }()
  36 
  37     // setup parameters and final results array
  38     res := make([]Result, len(cfg.Feeds))
  39     for i, uri := range cfg.Feeds {
  40         res[i] = Result{Index: i, URI: uri, Feed: feed{}, Problem: nil}
  41     }
  42 
  43     // concurrently fetch feeds
  44     for i := range tickets {
  45         go fetchItem(&res[i], &wg, cfg)
  46     }
  47     return res
  48 }
  49 
  50 // fetchItem is concurrently called/dispatched to try to fetch and decode a
  51 // single podcast feed: any error along the way is remembered as part of the
  52 // result, so the user can later be told about it
  53 func fetchItem(r *Result, wg *sync.WaitGroup, cfg config) {
  54     defer wg.Done()
  55 
  56     // read RSS feed
  57     b, err := slurp(r.URI)
  58     if err != nil {
  59         r.Problem = err
  60         return
  61     }
  62 
  63     // extract most important RSS info
  64     f, err := parseFeed(b)
  65     if err != nil {
  66         r.Problem = err
  67         return
  68     }
  69     // r.Feed = newFeed(f)
  70     r.Feed = f
  71 
  72     if !cfg.Thumbnails {
  73         // to hide thumbnails, use a no-data URI
  74         r.Feed.ImageLink = `data,`
  75         return
  76     }
  77 
  78     if !cfg.Inline {
  79         // if asked to, keep images as externally-linked resources
  80         return
  81     }
  82 
  83     // read image thumbnail
  84     b, err = slurp(f.ImageLink)
  85     if err != nil {
  86         r.Problem = err
  87         return
  88     }
  89 
  90     mime := `image/jpeg`
  91     if strings.Contains(f.ImageLink, `.png`) {
  92         mime = `image/png`
  93     }
  94 
  95     // data-URI-encode thumbnail, so it's part of the resulting webpage
  96     s, err := makeDataURI(b, mime)
  97     if err != nil {
  98         r.Problem = err
  99         return
 100     }
 101     r.Feed.ImageLink = s
 102 }
 103 
 104 func slurp(uri string) ([]byte, error) {
 105     resp, err := http.Get(uri)
 106     if err != nil {
 107         return nil, err
 108     }
 109     defer resp.Body.Close()
 110     return io.ReadAll(resp.Body)
 111 }

     File: podfeed/go.mod
   1 module podfeed
   2 
   3 go 1.18

     File: podfeed/info.txt
   1 podfeed [URIs/filenames...]
   2 
   3 Keep track of what's on multiple podcasts/RSS feeds with auto-popup links and
   4 collapsible descriptions.
   5 
   6 After fetching all RSS feeds, this program emits script-free HTML code for a
   7 standalone webpage with links to all feed items, each having expandable
   8 descriptions.
   9 
  10 The cmd-line arguments can be a mix of direct URIs to podcast/RSS feeds and
  11 filenames: in any files given, each line is taken as a URI to check, unless
  12 the line is empty or starts with #, which marks it as a comment line.

     File: podfeed/main.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "fmt"
   6     "html/template"
   7     "os"
   8 
   9     _ "embed"
  10 )
  11 
  12 //go:embed template.html
  13 var src string
  14 
  15 //go:embed info.txt
  16 var usage string
  17 
  18 // enable inlining/embedding thumbnails into page
  19 var funcs = template.FuncMap{
  20     `url`: func(s string) template.URL {
  21         return template.URL(s)
  22     },
  23 }
  24 
  25 var pageTemplate = template.Must(template.New(`main`).Funcs(funcs).Parse(src))
  26 
  27 // result is the payload given to the page template
  28 type result struct {
  29     Title string
  30     Feeds []feed
  31 }
  32 
  33 func main() {
  34     cfg := parseFlags(usage)
  35 
  36     // fetch feeds concurrently
  37     res := fetch(cfg)
  38 
  39     // show which podcasts/feeds caused problems, and keep only the ones
  40     // which were loaded successfully
  41     page := result{Title: cfg.Title}
  42     for _, v := range res {
  43         if v.Problem != nil {
  44             fmt.Fprintln(os.Stderr, v.Problem.Error())
  45             continue
  46         }
  47 
  48         // limit feed's item-length, unless length-limiting was disabled via
  49         // a negative value
  50         if cfg.ItemLimit >= 0 && len(v.Feed.Items) > cfg.ItemLimit {
  51             v.Feed.Items = v.Feed.Items[:cfg.ItemLimit]
  52         }
  53         page.Feeds = append(page.Feeds, v.Feed)
  54     }
  55 
  56     // render HTML result to standard output
  57     w := bufio.NewWriter(os.Stdout)
  58     defer w.Flush()
  59     pageTemplate.Execute(w, page)
  60 }

     File: podfeed/strings.go
   1 package main
   2 
   3 import (
   4     "encoding/base64"
   5     "fmt"
   6     "math"
   7     "regexp"
   8     "strings"
   9 )
  10 
  11 // note: the tag matcher can't rid anchor tags of inner tags in its content
  12 const tagRE = `</?[a-z][a-z1-6]*( +[a-z]+ *= *"[a-z A-Z0-9-]*")*( /)?>`
  13 
  14 // regex to match opening/closing HTML tags, used in function `clean`; the
  15 // first letter explicitly excludes a, to avoid matching/replacing anchor tags
  16 var tagMatcher = regexp.MustCompile(tagRE)
  17 
  18 // regex to match ampersand escapes, used in function `clean`
  19 var ampersandMatcher = regexp.MustCompile(`&[a-zA-Z]+;`)
  20 
  21 var ampersandEscapes = map[string]string{
  22     `&nbsp`: ` `,
  23     `&amp`:  `&`,
  24     `&lt`:   `<`,
  25     `&gt`:   `>`,
  26 }
  27 
  28 // clean improves the content of descriptions, by removing typical markup
  29 // junk often found in RSS feeds
  30 func clean(s string) string {
  31     s = tagMatcher.ReplaceAllStringFunc(s, func(s string) string {
  32         if strings.HasPrefix(s, `<a `) {
  33             return s
  34         }
  35 
  36         switch s {
  37         case `</a>`:
  38             return `</a>`
  39         case `<br/>`, `<br />`:
  40             return "\n"
  41         default:
  42             return ``
  43         }
  44     })
  45 
  46     s = ampersandMatcher.ReplaceAllStringFunc(s, func(s string) string {
  47         sub, ok := ampersandEscapes[s]
  48         if ok {
  49             return sub
  50         }
  51         return s
  52     })
  53 
  54     return s
  55 }
  56 
  57 // makeDataURI encodes the bytes given into a MIME-typed base64-encoded URI
  58 func makeDataURI(b []byte, mime string) (string, error) {
  59     var buf strings.Builder
  60     base64len := int(math.Ceil(4 * float64(len(b)) / 3))
  61     buf.Grow(len(`data:`) + len(mime) + len(`;base64,`) + base64len)
  62     fmt.Fprintf(&buf, `data:%s;base64,`, mime)
  63 
  64     enc := base64.NewEncoder(base64.StdEncoding, &buf)
  65     defer enc.Close()
  66 
  67     _, err := enc.Write(b)
  68     if err != nil {
  69         return ``, err
  70     }
  71     return buf.String(), nil
  72 }
  73 
  74 // notEmptyOr simplifies control flow around this app
  75 func notEmptyOr(s, fallback string) string {
  76     if len(s) > 0 {
  77         return s
  78     }
  79     return fallback
  80 }

     File: podfeed/template.html
   1 <!DOCTYPE html>
   2 <html lang="en">
   3 
   4 <head>
   5     <meta charset="UTF-8">
   6     <meta name="viewport" content="width=device-width, initial-scale=1.0">
   7     <link rel="icon" href="data:,">
   8     <title>{{ .Title }}</title>
   9     <style>
  10         body {
  11             font-size: 0.9rem;
  12             margin: 0 0 2rem 0;
  13             font-family: system-ui, -apple-system, sans-serif;
  14         }
  15 
  16         main {
  17             margin: auto;
  18             display: flex;
  19             width: fit-content;
  20         }
  21 
  22         h1 {
  23             top: 0;
  24             position: sticky;
  25             font-size: 0.9rem;
  26             text-align: center;
  27             background-color: white;
  28         }
  29 
  30         img {
  31             margin: auto;
  32             margin-bottom: 1rem;
  33             display: block;
  34             max-width: 15ch;
  35         }
  36 
  37         section {
  38             width: 48ch;
  39             padding: 0.3rem;
  40             margin: 0 0.1rem;
  41         }
  42 
  43         section:nth-child(2n+1) {
  44             background-color: #eee;
  45         }
  46 
  47         a {
  48             color: steelblue;
  49             text-decoration: none;
  50         }
  51 
  52         details p {
  53             line-height: 1.3rem;
  54         }
  55 
  56     </style>
  57 </head>
  58 
  59 <body>
  60     <main>
  61         {{- range .Feeds }}
  62         <article>
  63             <h1><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></h1>
  64             <img src="{{ .ImageLink | url }}">
  65             {{- range .Items }}
  66             <section>
  67                 <details>
  68                     <summary title="{{ .Tooltip }}"><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></summary>
  69                     <p>{{ .Description }}</p>
  70                 </details>
  71             </section>
  72             {{- end }}
  73         </article>
  74         {{- end }}
  75     </main>
  76 </body>
  77 
  78 </html>

     File: podfeed/testdata/mit-license.txt
   1 The MIT License (MIT)
   2 
   3 Copyright © 2024 pacman64
   4 
   5 Permission is hereby granted, free of charge, to any person obtaining a copy of
   6 this software and associated documentation files (the “Software”), to deal
   7 in the Software without restriction, including without limitation the rights to
   8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9 of the Software, and to permit persons to whom the Software is furnished to do
  10 so, subject to the following conditions:
  11 
  12 The above copyright notice and this permission notice shall be included in all
  13 copies or substantial portions of the Software.
  14 
  15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21 SOFTWARE.