File: cext.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from re import compile, IGNORECASE
  27 from sys import argv, exit, stderr, stdin, stdout
  28 from typing import Dict
  29 
  30 
  31 info = '''
  32 cext [options...] [filepaths/URIs...]
  33 
  34 Color/style file EXTensions makes all file-extensions stand out, using
  35 different ANSI-styles as it finds new ones. Since its list of styles is
  36 limited, these will start being reused, given enough unique extensions.
  37 
  38 The help option is `-h`, `--h`, `-help`, or `--help`.
  39 '''
  40 
  41 # a leading help-option arg means show the help message and quit
  42 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  43     print(info.strip(), file=stderr)
  44     exit(0)
  45 
  46 
  47 def style_line(w, s: str, styles: Dict[str, str]) -> None:
  48     'Does what it says, replacing content of the StringIO given to it.'
  49 
  50     # ignore trailing carriage-returns and line-feeds in lines
  51     s = s.rstrip('\r\n').rstrip('\n')
  52 
  53     # j keeps track of end of detected file-extensions, and is used
  54     # outside the regex-match loop to detect trailing parts in lines
  55     j = 0
  56 
  57     # matches is to keep track of whether any matches occurred
  58     matches = 0
  59 
  60     # replace all regex-matches on the line by surrounding each
  61     # matched substring with ANSI styles/resets
  62     for m in ext_re.finditer(s):
  63         matches += 1
  64         # remember previous index-end, used to emit the part before
  65         # the current match
  66         start = j
  67 
  68         i = m.start()
  69         j = m.end()
  70 
  71         # write part before match
  72         write_slice(w, s, start, i)
  73 
  74         ext = s[i:j]
  75 
  76         # pick style for the current match
  77         if ext in styles:
  78             st = styles[ext]
  79         else:
  80             # first occurrence of this extension
  81             st = palette[len(styles) % len(palette)]
  82             styles[ext] = st
  83 
  84         # style the extension
  85         w.write(st)
  86         w.write(ext)
  87         w.write('\x1b[0m')
  88         ext = ''
  89 
  90     if matches == 0:
  91         # show lines with no matches
  92         w.write(s)
  93     elif j > 0:
  94         # don't forget trailing part of line
  95         w.write(s[j:])
  96 
  97     # don't forget to end the line
  98     w.write('\n')
  99     w.flush()
 100 
 101 
 102 def write_slice(w, s: str, start: int, end: int) -> None:
 103     # 'Emit slice-like substrings without allocating slices.'
 104     # for i in range(start, end):
 105     #     w.write(s[i])
 106     w.write(s[start:end])
 107 
 108 
 109 def seems_url(s: str) -> bool:
 110     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 111     return any(s.startswith(p) for p in protocols)
 112 
 113 
 114 # ext_re is the regex used to find all file extensions
 115 ext_re = compile('\\.([0-9][a-z_-]+|[a-z_-][a-z_0-9-]+)', flags=IGNORECASE)
 116 
 117 # palette is the whole list of ANSI-styles used to make extensions stand out
 118 palette = [
 119     '\x1b[38;5;26m', # blue
 120     '\x1b[38;5;166m', # orange
 121     '\x1b[38;5;99m', # purple
 122     '\x1b[38;5;38m', # cyan
 123     '\x1b[38;5;213m', # pink
 124     '\x1b[38;5;29m', # green
 125     '\x1b[31m', # red
 126     '\x1b[38;5;248m', # gray
 127 
 128     # '\x1b[1m', # bold
 129     # '\x1b[4m', # underline
 130     # '\x1b[7m', # inverse
 131 ]
 132 
 133 try:
 134     args = argv[1:]
 135     styles = {}
 136 
 137     if args.count('-') > 1:
 138         msg = 'reading from `-` (standard input) more than once not allowed'
 139         raise ValueError(msg)
 140 
 141     if any(seems_url(e) for e in args):
 142         from urllib.request import urlopen
 143 
 144     # handle all named inputs given
 145     for path in args:
 146         if path == '-':
 147             for line in stdin:
 148                 style_line(stdout, line, styles)
 149             continue
 150 
 151         if seems_url(path):
 152             with urlopen(path) as inp:
 153                 for line in inp:
 154                     style_line(stdout, str(line, encoding='utf-8'), styles)
 155             continue
 156 
 157         with open(path, encoding='utf-8') as inp:
 158             for line in inp:
 159                 style_line(stdout, line, styles)
 160 
 161     if len(args) == 0:
 162         for line in stdin:
 163             style_line(stdout, line, styles)
 164 except BrokenPipeError:
 165     # quit quietly, instead of showing a confusing error message
 166     stderr.close()
 167 except KeyboardInterrupt:
 168     exit(2)
 169 except Exception as e:
 170     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 171     exit(1)