#!/usr/bin/python3

# The MIT License (MIT)
#
# Copyright © 2020-2025 pacman64
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


from datetime import datetime
from html import escape
from multiprocessing import Pool
from sys import argv, exit, stderr, stdin
from typing import Dict, List
from urllib.parse import urlparse, urlunparse
from urllib.request import urlopen
from xml.dom.minidom import parse


info = '''
podfeed [options...] [filepaths/URIs...]


PODcast FEED fetches all episodes from the feeds given as URIs, either as
arguments, or as lines in the plain-text files given.

The result is self-contained HTML which links to all episodes, and adds
many little extras, such as tooltips showing date of publication and play
duration.

Podcast thumbnails aren't included as inline data-URIs, to avoid making
the output size considerably bigger; they could easily source external
URIs, but doing that would make the output no longer fully self-contained.

All (optional) leading options start with either single or double-dash,
and most of them change the style/color used. Some of the options are,
shown in their single-dash form:

    -h          show this help message
    -help       show this help message

    -title      use the next argument as the title in the HTML output
'''

# a leading help-option arg means show the help message and quit
if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
    print(info.strip())
    exit(0)


def fail(msg, code: int = 1) -> None:
    'Show the error message given, and quit the app right away.'
    print(f'\x1b[31m{msg}\x1b[0m', file=stderr)
    exit(code)


# handle leading cmd-line options
title = ''
start_args = 1
while start_args < len(argv) and argv[start_args].startswith('-'):
    l = argv[start_args].lstrip('-').lower()
    if l in ('title'):
        if start_args + 1 >= len(argv):
            fail('missing actual title in cmd-line arguments', 1)
        title = argv[start_args + 1]
        start_args += 2
        continue
    break
args = argv[start_args:]

# use a default web-page title if one wasn't given
if title == '':
    now = datetime.now()
    ymd = f'{now.year}-{now.month:02}-{now.day:02}'
    hms = f'{now.hour}:{now.minute:02}:{now.second:02}'
    title = f'Latest Podcast Episodes as of {ymd} {hms}'


def parse_feed(uri: str) -> Dict:
    'Turn an XML feed into dictionaries, given the feed\'s URI.'

    res = {'rss': []}
    with urlopen(uri) as inp:
        feed = parse(inp)
        for rss in feed.getElementsByTagName('rss'):
            channels = rss.getElementsByTagName('channel')
            channels = [parse_channel(chan) for chan in channels]
            res['rss'].append({'channels': channels})
    return res


def parse_channel(chan) -> Dict:
    'Help func parse_feed do its job.'

    title = get_str(chan, 'title')
    link = get_str(chan, 'link')
    descr = get_str(chan, 'description')
    # no channel thumbnail for now

    episodes = chan.getElementsByTagName('item')
    episodes = [parse_episode(ep) for ep in episodes]

    return {
        'title': title,
        'link': link,
        'description': descr,
        'episodes': episodes,
    }


def parse_episode(episode) -> Dict:
    'Help func parse_channel do its job.'

    title = get_str(episode, 'title')
    link = get_str(episode, 'link')
    description = get_str(episode, 'description')
    pub_date = get_str(episode, 'pubDate')
    duration = get_str(episode, 'itunes:duration')
    for enc in episode.getElementsByTagName('enclosure'):
        link = enc.getAttribute('url')

    return {
        'title': title,
        'link': link,
        'description': description,
        'pub_date': pub_date,
        'duration': duration,
    }


def render_feed(feed) -> None:
    'Handle a single parsed RSS feed.'

    indent = 12 * ' '
    print('        <article>')

    for rss in feed['rss']:
        for chan in rss['channels']:
            href = urlunparse(urlparse(chan['link']))
            title = escape(chan['title'])
            descr = escape(chan['description'])
            a = make_anchor(href, title)
            s = f'{indent}<h1><summary title="{descr}">{a}</summary></h1>'
            print(s)
            # no channel thumbnail for now

            for episode in chan['episodes']:
                render_episode(episode)

    print('        </article>')


def render_episode(episode) -> None:
    'Help func render_feed do its job.'

    title = escape(episode['title'])
    href = urlunparse(urlparse(episode['link']))
    description = escape(episode['description'])
    pub_date = escape(episode['pub_date'])
    duration = escape(episode['duration'])
    tt = make_tooltip(pub_date, duration)
    a = make_anchor(href, title)

    print('            <section>')
    print('                <details>')
    print(f'                    <summary title="{tt}">{a}</summary>')
    print(f'                    <p>{description}</p>')
    print('                </details>')
    print('            </section>')


def make_anchor(href: str, title: str) -> str:
    'Standardize how hyperlinks are handled in this script.'
    return f'<a target="_blank" rel="noreferrer" href="{href}">{title}</a>'


def make_tooltip(pub_date: str, duration: str) -> str:
    try:
        # because datetime's supposedly-idiomatic solutions are so ugly
        s = int(duration)
        h = int(s / 3600)
        m = int(s / 60) % 3600
        s %= 60
        hms = f'{h:02}:{m:02}:{s:02}'.lstrip('00:')
    except Exception:
        hms = duration
    return f'published: {pub_date} | duration: {hms}'


def get_str(src, tag: str) -> str:
    'Simplify the control-flow of various feed-parsing funcs.'

    try:
        res = src.getElementsByTagName(tag)
        if len(res) == 0:
            return ''
        for e in res[0].childNodes:
            if e.nodeType in (e.TEXT_NODE, e.CDATA_SECTION_NODE):
                return e.data.strip()
        return ''
    except Exception:
        return ''


def get_uris(src) -> List[str]:
    'This func helps func load_feed_uris load all URIs from a file.'

    uris = []
    for line in src:
        line = line.rstrip('\r\n').rstrip('\n').strip()
        if line == '' or line.startswith('#'):
            continue
        uris.append(line)
    return uris


def load_feed_uris(args: List[str]) -> List[str]:
    'Turn a mix of URIs and filepaths into a list of URIs to load.'

    if args.count('-') > 1:
        msg = 'reading from `-` (standard input) more than once not allowed'
        raise ValueError(msg)

    if len(args) == 0:
        return get_uris(stdin)

    uris = []

    for path in args:
        if path.startswith('https://') or path.startswith('http://'):
            uris.append(path)
            continue

        if path == '-':
            uris.extend(get_uris(stdin))

        with open(path, encoding='utf-8') as inp:
            uris.extend(get_uris(inp))

    return uris


# style is the `inner` CSS used inside the style tag
style = '''
        body {
            font-size: 0.9rem;
            margin: 0 0 2rem 0;
            font-family: system-ui, -apple-system, sans-serif;
        }

        main {
            margin: auto;
            display: flex;
            width: fit-content;
        }

        h1 {
            top: 0;
            position: sticky;
            font-size: 0.9rem;
            text-align: center;
            background-color: white;
        }

        img {
            margin: auto;
            margin-bottom: 1rem;
            display: block;
            max-width: 15ch;
        }

        section {
            width: 48ch;
            padding: 0.3rem;
            margin: 0 0.1rem;
        }

        section:nth-child(2n+1) {
            background-color: #eee;
        }

        a {
            color: steelblue;
            text-decoration: none;
        }

        details p {
            line-height: 1.3rem;
        }
'''.strip('\n')


try:
    feeds = load_feed_uris(args)

    print('<!DOCTYPE html>')
    print('<html lang="en">')
    print('<head>')
    print('    <meta charset="UTF-8">')
    print('    <link rel="icon" href="data:,">')
    cattr = 'content="width=device-width, initial-scale=1.0"'
    print(f'    <meta name="viewport" {cattr}>')
    if title != '':
        print(f'    <title>{escape(title)}</title>')
    print('    <style>')
    print(style)
    print('    </style>')
    print('</head>')
    print('<body>')
    print('    <main>')

    # significantly speed-up script by loading/parsing feeds concurrently
    with Pool(processes=min(4, len(feeds))) as pool:
        feeds = pool.map(parse_feed, feeds)

    for feed in feeds:
        render_feed(feed)

    print('    </main>')
    print('</body>')
    print('</html>')
except BrokenPipeError:
    # quit quietly, instead of showing a confusing error message
    stderr.close()
except KeyboardInterrupt:
    exit(2)
except Exception as e:
    fail(e, 1)