#!/usr/bin/python3

# The MIT License (MIT)
#
# Copyright © 2020-2025 pacman64
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


from math import ceil, log10
from re import compile as compile_re, IGNORECASE, Pattern
from sys import argv, exit, stderr, stdin, stdout


info = '''
frep [options...] [regex] [filepaths/URIs...]


Flat Regular-Expression Print(er) shows all regex-matches on binary data,
without being limited by end-of-line-style byte-sequences.

Matches are always shown ANSI-styled, starting with the data-source name
(a filepath, or URI), the 0-based byte-offset range of the match, a few
preceding bytes, the bytes matched (styled), and a few following bytes,
the surrounding unstyled bytes shown for context.

All (optional) leading options start with either single or double-dash.
Some of the options are, shown in their single-dash form:

    -h       show this help message
    -help    show this help message

    -i       case-insensitive matching of ASCII letters
'''

if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
    print(info.strip())
    exit(0)


def seems_url(s: str) -> bool:
    protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
    return any(s.startswith(p) for p in protocols)


def handle_input(w, name: str, data: bytes, pat: Pattern) -> None:
    pad = 20
    o = w.write
    l = len(data)
    n = int(ceil(log10(l))) if l > 0 else 0

    for m in pat.finditer(data):
        i = m.start()
        j = m.end()

        s = f'\x1b[35m{name} \x1b[34m[{i:{0}>{n}}:{j:{0}>{n}}]\x1b[0m '
        o(s.encode('utf-8'))
        o(data[(i - min(i, pad)):i])
        o(b'\x1b[48;5;29m\x1b[97m')
        o(data[i:j])
        o(b'\x1b[0m')
        o(data[j:min(l, j + pad)])
        o(b'\x1b[0m\n')

try:
    mode = 0 # NOFLAG
    start_inputs = 2
    if len(argv) > 1 and argv[1] in ('-i', '--i'):
        mode = IGNORECASE
        start_inputs = 3

    if len(argv) < start_inputs:
        print(info.strip(), file=stderr)
        raise ValueError('no regex given')

    expr = compile_re(argv[start_inputs - 1].encode('utf-8'), flags=mode)
    inputs = argv[start_inputs:]

    if inputs.count('-') > 1:
        msg = 'can\'t read from `-` (standard input) more than once'
        raise ValueError(msg)

    if any(seems_url(e) for e in inputs):
        from urllib.request import urlopen

    r = stdin.buffer
    w = stdout.buffer
    f = handle_input

    for path in inputs:
        if path == '-':
            f(w, path, r.read(), expr)
            continue

        if seems_url(path):
            with urlopen(path) as inp:
                f(w, path, inp.read(), expr)
            continue

        with open(path, mode='rb') as inp:
            f(w, path, inp.read(), expr)

    if len(inputs) == 0:
        f(w, '-', r.read(), expr)
except BrokenPipeError:
    # quit quietly, instead of showing a confusing error message
    stderr.close()
except KeyboardInterrupt:
    exit(2)
except Exception as e:
    print(f'\x1b[31m{e}\x1b[0m', file=stderr)
    exit(1)