File: podfeed.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for podfeed. 27 28 To compile a smaller-sized command-line app, you can use the `go` command as 29 follows: 30 31 go build -ldflags "-s -w" -trimpath podfeed.go 32 */ 33 34 package main 35 36 import ( 37 "bufio" 38 "bytes" 39 "encoding/base64" 40 "encoding/xml" 41 "errors" 42 "flag" 43 "fmt" 44 "html/template" 45 "io" 46 "math" 47 "net/http" 48 "os" 49 "regexp" 50 "runtime" 51 "strconv" 52 "strings" 53 "sync" 54 "time" 55 ) 56 57 const src = ` 58 <!DOCTYPE html> 59 <html lang="en"> 60 61 <head> 62 <meta charset="UTF-8"> 63 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 64 <link rel="icon" href="data:,"> 65 <title>{{ .Title }}</title> 66 <style> 67 body { 68 font-size: 0.9rem; 69 margin: 0 0 2rem 0; 70 font-family: system-ui, -apple-system, sans-serif; 71 } 72 73 main { 74 margin: auto; 75 display: flex; 76 width: fit-content; 77 } 78 79 h1 { 80 top: 0; 81 position: sticky; 82 font-size: 0.9rem; 83 text-align: center; 84 background-color: white; 85 } 86 87 img { 88 margin: auto; 89 margin-bottom: 1rem; 90 display: block; 91 max-width: 15ch; 92 } 93 94 section { 95 width: 48ch; 96 padding: 0.3rem; 97 margin: 0 0.1rem; 98 } 99 100 section:nth-child(2n+1) { 101 background-color: #eee; 102 } 103 104 a { 105 color: steelblue; 106 text-decoration: none; 107 } 108 109 details p { 110 line-height: 1.3rem; 111 } 112 113 </style> 114 </head> 115 116 <body> 117 <main> 118 {{- range .Feeds }} 119 <article> 120 <h1><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></h1> 121 <img src="{{ .ImageLink | url }}"> 122 {{- range .Items }} 123 <section> 124 <details> 125 <summary title="{{ .Tooltip }}"><a target="_blank" rel="noreferrer" href="{{ .Link }}">{{ .Title }}</a></summary> 126 <p>{{ .Description }}</p> 127 </details> 128 </section> 129 {{- end }} 130 </article> 131 {{- end }} 132 </main> 133 </body> 134 135 </html> 136 ` 137 138 const usage = ` 139 podfeed [URIs/filenames...] 140 141 Keep track of what's on multiple podcasts/RSS feeds with auto-popup links and 142 collapsible descriptions. 143 144 After fetching all RSS feeds, this program emits script-free HTML code for a 145 standalone webpage with links to all feed items, each having expandable 146 descriptions. 147 148 The cmd-line arguments can be a mix of direct URIs to podcast/RSS feeds and 149 filenames: in any files given, each line is taken as a URI to check, unless 150 the line is empty or starts with #, which marks it as a comment line. 151 ` 152 153 // enable inlining/embedding thumbnails into page 154 var funcs = template.FuncMap{ 155 `url`: func(s string) template.URL { 156 return template.URL(s) 157 }, 158 } 159 160 var pageTemplate = template.Must(template.New(`main`).Funcs(funcs).Parse(src[1:])) 161 162 // result is the payload given to the page template 163 type result struct { 164 Title string 165 Feeds []feed 166 } 167 168 func main() { 169 cfg := parseFlags(usage[1:]) 170 171 // fetch feeds concurrently 172 res := fetch(cfg) 173 174 // show which podcasts/feeds caused problems, and keep only the ones 175 // which were loaded successfully 176 page := result{Title: cfg.Title} 177 for _, v := range res { 178 if v.Problem != nil { 179 fmt.Fprintln(os.Stderr, v.Problem.Error()) 180 continue 181 } 182 183 // limit feed's item-length, unless length-limiting was disabled via 184 // a negative value 185 if cfg.ItemLimit >= 0 && len(v.Feed.Items) > cfg.ItemLimit { 186 v.Feed.Items = v.Feed.Items[:cfg.ItemLimit] 187 } 188 page.Feeds = append(page.Feeds, v.Feed) 189 } 190 191 // render HTML result to standard output 192 w := bufio.NewWriter(os.Stdout) 193 defer w.Flush() 194 pageTemplate.Execute(w, page) 195 } 196 197 const ( 198 titleUsage = `title for the HTML result` 199 itemLimitUsage = `max items shown per feed, starting from latest; negative to disable` 200 thumbnailsUsage = `show channel/podcast thumbnails` 201 inlineUsage = `inline/embed thumbnails as base64 data` 202 ) 203 204 // config has all the cmd-line options: each has its own default value, but 205 // can be explicitly set via one of the cmd-line flags 206 type config struct { 207 Feeds []string 208 Title string 209 ItemLimit int 210 Thumbnails bool 211 Inline bool 212 } 213 214 func parseFlags(usage string) config { 215 cfg := config{ 216 Title: `Latest Podcast Episodes`, 217 ItemLimit: -1, 218 Thumbnails: true, 219 Inline: true, 220 } 221 222 flag.Usage = func() { 223 fmt.Fprintf(flag.CommandLine.Output(), "%s\n\nOptions\n\n", usage) 224 flag.PrintDefaults() 225 } 226 flag.StringVar(&cfg.Title, `title`, cfg.Title, titleUsage) 227 flag.IntVar(&cfg.ItemLimit, `max`, cfg.ItemLimit, itemLimitUsage) 228 flag.BoolVar(&cfg.Thumbnails, `thumbs`, cfg.Thumbnails, thumbnailsUsage) 229 flag.BoolVar(&cfg.Inline, `inline`, cfg.Inline, inlineUsage) 230 flag.Parse() 231 232 for _, a := range flag.Args() { 233 if strings.HasPrefix(a, `https://`) || strings.HasPrefix(a, `http://`) { 234 // it's a URI feed 235 cfg.Feeds = append(cfg.Feeds, a) 236 continue 237 } 238 239 // it's a text file with feed URIs, one per line 240 lines, err := slurpFileLines(a) 241 if err != nil { 242 fmt.Fprintln(os.Stderr, err.Error()) 243 continue 244 } 245 cfg.Feeds = append(cfg.Feeds, lines...) 246 } 247 248 // if not given any filenames/URIs, read URIs from stdin 249 if flag.NArg() == 0 { 250 lines, err := slurpLines(os.Stdin) 251 if err == nil { 252 cfg.Feeds = append(cfg.Feeds, lines...) 253 } else { 254 fmt.Fprintln(os.Stderr, err.Error()) 255 } 256 } 257 258 return cfg 259 } 260 261 func slurpFileLines(fname string) ([]string, error) { 262 f, err := os.Open(fname) 263 if err != nil { 264 return nil, err 265 } 266 defer f.Close() 267 return slurpLines(f) 268 } 269 270 func slurpLines(r io.Reader) ([]string, error) { 271 var lines []string 272 const maxbufsize = 8 * 1024 * 1024 * 1024 273 sc := bufio.NewScanner(r) 274 sc.Buffer(nil, maxbufsize) 275 276 for sc.Scan() { 277 err := sc.Err() 278 if err != nil { 279 return lines, err 280 } 281 282 s := strings.TrimSpace(sc.Text()) 283 // ignore empty lines and comment lines 284 if s == `` || strings.HasPrefix(s, `#`) { 285 continue 286 } 287 288 lines = append(lines, s) 289 } 290 291 return lines, nil 292 } 293 294 // note: the tag matcher can't rid anchor tags of inner tags in its content 295 const tagRE = `</?[a-z][a-z1-6]*( +[a-z]+ *= *"[a-z A-Z0-9-]*")*( /)?>` 296 297 // regex to match opening/closing HTML tags, used in function `clean`; the 298 // first letter explicitly excludes a, to avoid matching/replacing anchor tags 299 var tagMatcher = regexp.MustCompile(tagRE) 300 301 // regex to match ampersand escapes, used in function `clean` 302 var ampersandMatcher = regexp.MustCompile(`&[a-zA-Z]+;`) 303 304 var ampersandEscapes = map[string]string{ 305 ` `: ` `, 306 `&`: `&`, 307 `<`: `<`, 308 `>`: `>`, 309 } 310 311 // clean improves the content of descriptions, by removing typical markup 312 // junk often found in RSS feeds 313 func clean(s string) string { 314 s = tagMatcher.ReplaceAllStringFunc(s, func(s string) string { 315 if strings.HasPrefix(s, `<a `) { 316 return s 317 } 318 319 switch s { 320 case `</a>`: 321 return `</a>` 322 case `<br/>`, `<br />`: 323 return "\n" 324 default: 325 return `` 326 } 327 }) 328 329 s = ampersandMatcher.ReplaceAllStringFunc(s, func(s string) string { 330 sub, ok := ampersandEscapes[s] 331 if ok { 332 return sub 333 } 334 return s 335 }) 336 337 return s 338 } 339 340 // makeDataURI encodes the bytes given into a MIME-typed base64-encoded URI 341 func makeDataURI(b []byte, mime string) (string, error) { 342 var buf strings.Builder 343 base64len := int(math.Ceil(4 * float64(len(b)) / 3)) 344 buf.Grow(len(`data:`) + len(mime) + len(`;base64,`) + base64len) 345 fmt.Fprintf(&buf, `data:%s;base64,`, mime) 346 347 enc := base64.NewEncoder(base64.StdEncoding, &buf) 348 defer enc.Close() 349 350 _, err := enc.Write(b) 351 if err != nil { 352 return ``, err 353 } 354 return buf.String(), nil 355 } 356 357 // notEmptyOr simplifies control flow around this app 358 func notEmptyOr(s, fallback string) string { 359 if len(s) > 0 { 360 return s 361 } 362 return fallback 363 } 364 365 // parseFeed takes raw RSS-string bytes and makes a feed object out of them 366 func parseFeed(b []byte) (feed, error) { 367 atom, err := parseAtom(b) 368 if err != io.EOF && err != nil { 369 return feed{}, err 370 } 371 372 if len(atom.Channels) == 0 { 373 return feed{}, errors.New(`feed has no channels`) 374 } 375 if len(atom.Channels) > 1 { 376 const msg = `multiple channels in a single feed aren't supported` 377 return feed{}, errors.New(msg) 378 } 379 380 var feed feed 381 ch := atom.Channels[0] 382 feed.Title = ch.Title 383 feed.Link = strings.Replace(ch.Link, `http://`, `https://`, 1) 384 feed.ImageLink = ch.Image.URL 385 feed.Description = clean(ch.Description) 386 387 for _, v := range ch.Items { 388 if len(v.Enclosures) == 0 { 389 continue 390 } 391 feed.Items = append(feed.Items, adaptItem(v)) 392 } 393 return feed, nil 394 } 395 396 // feed is a template-friendly representation of a parsed podcast feed 397 type feed struct { 398 Title string 399 Link string 400 ImageLink string 401 Description string 402 403 Items []item 404 } 405 406 // item is a template-friendly representation of a podcast episode 407 type item struct { 408 Title string 409 Link string 410 Tooltip string 411 Description string 412 } 413 414 // adaptItem makes a podcast episodes's info more template-friendly 415 func adaptItem(v atomItem) item { 416 tooltip := `` 417 duration := v.Duration 418 // if duration is in seconds, turn it into the hh:mm:ss format 419 if !strings.Contains(duration, `:`) { 420 n, err := strconv.Atoi(duration) 421 if err == nil && n > 0 { 422 duration = (time.Duration(n) * time.Second).String() 423 } 424 } 425 426 if duration != `` && v.PublicationDate != `` { 427 const fs = `published: %s | duration: %s` 428 tooltip = fmt.Sprintf(fs, v.PublicationDate, duration) 429 } 430 if duration == `` && v.PublicationDate != `` { 431 tooltip = fmt.Sprintf(`published: %s`, v.PublicationDate) 432 } 433 if duration != `` && v.PublicationDate == `` { 434 tooltip = fmt.Sprintf(`duration: %s`, v.PublicationDate) 435 } 436 437 enc := v.Enclosures[0] 438 return item{ 439 Title: v.Title, 440 Link: notEmptyOr(enc.URL, enc.AttrURL), 441 Tooltip: tooltip, 442 Description: clean(v.Description), 443 } 444 } 445 446 // atomFeed is an atom-format XML/RSS document: all its useful info is in the 447 // Channels field, which is an array with usually only 1 item 448 type atomFeed struct { 449 Atom string `xml:"atom"` 450 CC string `xml:"cc"` 451 Channels []atomChannel `xml:"channel"` 452 Content string `xml:"content"` 453 Media string `xml:"media"` 454 Version int `xml:"version"` 455 } 456 457 // atomChannel has all the channel tags in an atom-format document: its most 458 // useful info is in its Items array field 459 type atomChannel struct { 460 Author string `xml:"author"` 461 Description string `xml:"description"` 462 Docs string `xml:"docs"` 463 Explicit string `xml:"explicit"` 464 Image atomImage `xml:"image"` 465 Items []atomItem `xml:"item"` 466 Language string `xml:"language"` 467 Link string `xml:"link"` 468 PublicationDate string `xml:"pubDate"` 469 Summary string `xml:"summary"` 470 Title string `xml:"title"` 471 Subtitle string `xml:"subtitle"` 472 473 // Copyright string `xml:"copyright"` 474 // Generator string `xml:"generator"` 475 // Categories []string 476 // Image []string 477 // Owner []string 478 // ManagingEditor string 479 // LastBuildDate string 480 // Type string 481 } 482 483 // atomImage is a channel's thumbnail image/logo 484 type atomImage struct { 485 Title string `xml:"title"` 486 URL string `xml:"url"` 487 } 488 489 // atomItem is a link to a podcast episode or to an article 490 type atomItem struct { 491 Author string `xml:"author"` 492 Description string `xml:"description"` 493 Duration string `xml:"duration"` // media-duration as hh:mm:ss 494 495 Enclosures []atomEnclosure `xml:"enclosure"` 496 497 Episode int `xml:"episode"` 498 Explicit string `xml:"explicit"` 499 PublicationDate string `xml:"pubDate"` 500 Summary string `xml:"summary"` 501 Title string `xml:"title"` 502 503 // Keywords []string // not sure these array items are strings 504 } 505 506 // atomEnclosure is an item's link, along with some useful metadata 507 type atomEnclosure struct { 508 Length int `xml:"length"` // seems to be the media filesize 509 Type string `xml:"type"` // MIME type for the media file 510 URL string `xml:"url"` // the URL for the media file 511 512 // special iTunes attributes 513 AttrLength int `xml:"length,attr"` 514 AttrType string `xml:"type,attr"` 515 AttrURL string `xml:"url,attr"` 516 AttrDuration string `xml:"duration,attr"` 517 } 518 519 // parseAtom decodes podcast/feed info from the bytes given 520 func parseAtom(b []byte) (atomFeed, error) { 521 var wrap atomFeed 522 if !bytes.Contains(b, []byte(`itunes:`)) { 523 err := xml.Unmarshal(b, &wrap) 524 return wrap, err 525 } 526 527 b = bytes.ReplaceAll(b, []byte(`<itunes:`), []byte{'<'}) 528 b = bytes.ReplaceAll(b, []byte(`</itunes:`), []byte{'<', '/'}) 529 err := xml.Unmarshal(b, &wrap) 530 return wrap, err 531 } 532 533 // Result is the payload/error combo resulting from trying to fetch a feed. 534 type Result struct { 535 Index int 536 URI string 537 538 Feed feed 539 Problem error 540 } 541 542 // fetch tries to fetch all podcast feeds concurrently, to save time 543 func fetch(cfg config) []Result { 544 var wg sync.WaitGroup 545 wg.Add(len(cfg.Feeds)) 546 547 // start rate-limiter up to the # of CPUs 548 tickets := make(chan int, runtime.NumCPU()) 549 go func() { 550 for i := range cfg.Feeds { 551 tickets <- i 552 } 553 554 // wait until fetcher loop below has finished dispatching all tasks 555 wg.Wait() 556 close(tickets) // quit the fetcher loop 557 }() 558 559 // setup parameters and final results array 560 res := make([]Result, len(cfg.Feeds)) 561 for i, uri := range cfg.Feeds { 562 res[i] = Result{Index: i, URI: uri, Feed: feed{}, Problem: nil} 563 } 564 565 // concurrently fetch feeds 566 for i := range tickets { 567 go fetchItem(&res[i], &wg, cfg) 568 } 569 return res 570 } 571 572 // fetchItem is concurrently called/dispatched to try to fetch and decode a 573 // single podcast feed: any error along the way is remembered as part of the 574 // result, so the user can later be told about it 575 func fetchItem(r *Result, wg *sync.WaitGroup, cfg config) { 576 defer wg.Done() 577 578 // read RSS feed 579 b, err := slurp(r.URI) 580 if err != nil { 581 r.Problem = err 582 return 583 } 584 585 // extract most important RSS info 586 f, err := parseFeed(b) 587 if err != nil { 588 r.Problem = err 589 return 590 } 591 // r.Feed = newFeed(f) 592 r.Feed = f 593 594 if !cfg.Thumbnails { 595 // to hide thumbnails, use a no-data URI 596 r.Feed.ImageLink = `data,` 597 return 598 } 599 600 if !cfg.Inline { 601 // if asked to, keep images as externally-linked resources 602 return 603 } 604 605 // read image thumbnail 606 b, err = slurp(f.ImageLink) 607 if err != nil { 608 r.Problem = err 609 return 610 } 611 612 mime := `image/jpeg` 613 if strings.Contains(f.ImageLink, `.png`) { 614 mime = `image/png` 615 } 616 617 // data-URI-encode thumbnail, so it's part of the resulting webpage 618 s, err := makeDataURI(b, mime) 619 if err != nil { 620 r.Problem = err 621 return 622 } 623 r.Feed.ImageLink = s 624 } 625 626 func slurp(uri string) ([]byte, error) { 627 resp, err := http.Get(uri) 628 if err != nil { 629 return nil, err 630 } 631 defer resp.Body.Close() 632 return io.ReadAll(resp.Body) 633 }