File: frep.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from math import ceil, log10
  27 from re import compile as compile_re, IGNORECASE, Pattern
  28 from sys import argv, exit, stderr, stdin, stdout
  29 
  30 
  31 info = '''
  32 frep [options...] [regex] [filepaths/URIs...]
  33 
  34 
  35 Flat Regular-Expression Print(er) shows all regex-matches on binary data,
  36 without being limited by end-of-line-style byte-sequences.
  37 
  38 Matches are always shown ANSI-styled, starting with the data-source name
  39 (a filepath, or URI), the 0-based byte-offset range of the match, a few
  40 preceding bytes, the bytes matched (styled), and a few following bytes,
  41 the surrounding unstyled bytes shown for context.
  42 
  43 All (optional) leading options start with either single or double-dash.
  44 Some of the options are, shown in their single-dash form:
  45 
  46     -h       show this help message
  47     -help    show this help message
  48 
  49     -i       case-insensitive matching of ASCII letters
  50 '''
  51 
  52 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  53     print(info.strip(), file=stderr)
  54     exit(0)
  55 
  56 
  57 def seems_url(s: str) -> bool:
  58     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
  59     return any(s.startswith(p) for p in protocols)
  60 
  61 
  62 def handle_input(w, name: str, data: bytes, pat: Pattern) -> None:
  63     pad = 20
  64     o = w.write
  65     l = len(data)
  66     n = int(ceil(log10(l))) if l > 0 else 0
  67 
  68     for m in pat.finditer(data):
  69         i = m.start()
  70         j = m.end()
  71 
  72         s = f'\x1b[35m{name} \x1b[34m[{i:{0}>{n}}:{j:{0}>{n}}]\x1b[0m '
  73         o(s.encode('utf-8'))
  74         o(data[(i - min(i, pad)):i])
  75         o(b'\x1b[48;5;29m\x1b[97m')
  76         o(data[i:j])
  77         o(b'\x1b[0m')
  78         o(data[j:min(l, j + pad)])
  79         o(b'\x1b[0m\n')
  80 
  81 try:
  82     mode = 0 # NOFLAG
  83     start_inputs = 2
  84     if len(argv) > 1 and argv[1] in ('-i', '--i'):
  85         mode = IGNORECASE
  86         start_inputs = 3
  87 
  88     if len(argv) < start_inputs:
  89         print(info.strip(), file=stderr)
  90         raise ValueError('no regex given')
  91 
  92     expr = compile_re(argv[start_inputs - 1].encode('utf-8'), flags=mode)
  93     inputs = argv[start_inputs:]
  94 
  95     if inputs.count('-') > 1:
  96         msg = 'can\'t read from `-` (standard input) more than once'
  97         raise ValueError(msg)
  98 
  99     if any(seems_url(e) for e in inputs):
 100         from urllib.request import urlopen
 101 
 102     r = stdin.buffer
 103     w = stdout.buffer
 104     f = handle_input
 105 
 106     for path in inputs:
 107         if path == '-':
 108             f(w, path, r.read(), expr)
 109             continue
 110 
 111         if seems_url(path):
 112             with urlopen(path) as inp:
 113                 f(w, path, inp.read(), expr)
 114             continue
 115 
 116         with open(path, mode='rb') as inp:
 117             f(w, path, inp.read(), expr)
 118 
 119     if len(inputs) == 0:
 120         f(w, '-', r.read(), expr)
 121 except BrokenPipeError:
 122     # quit quietly, instead of showing a confusing error message
 123     stderr.close()
 124 except KeyboardInterrupt:
 125     exit(2)
 126 except Exception as e:
 127     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 128     exit(1)