File: cext.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # cext [options...] [filepaths/URIs...]
  27 #
  28 # Color/style file EXTensions makes all file-extensions stand out, using
  29 # different ANSI-styles as it finds new ones. Since its list of styles is
  30 # limited, these will start being reused, given enough unique extensions.
  31 #
  32 # The help option is `-h`, `--h`, `-help`, or `--help`.
  33 
  34 
  35 from re import compile, IGNORECASE
  36 from sys import argv, exit, stderr, stdin, stdout
  37 from typing import Dict
  38 from urllib.request import urlopen
  39 
  40 
  41 # info is the message shown when the script isn't given any argument, or
  42 # when the leading argument is one of the standard cmd-line help options
  43 info = '''
  44 cext [options...] [filepaths/URIs...]
  45 
  46 Color/style file EXTensions makes all file-extensions stand out, using
  47 different ANSI-styles as it finds new ones. Since its list of styles is
  48 limited, these will start being reused, given enough unique extensions.
  49 
  50 The help option is `-h`, `--h`, `-help`, or `--help`.
  51 '''.strip()
  52 
  53 # a leading help-option arg means show the help message and quit
  54 if len(argv) == 2 and argv[1].lower() in ('-h', '--h', '-help', '--help'):
  55     print(info, file=stderr)
  56     exit(0)
  57 
  58 
  59 def style_line(w, s: str, styles: Dict[str, str]) -> None:
  60     '''Does what it says, replacing content of the StringIO given to it.'''
  61 
  62     # ignore trailing carriage-returns and line-feeds in lines
  63     s = s.rstrip('\r\n').rstrip('\n')
  64 
  65     # j keeps track of end of detected file-extensions, and is used
  66     # outside the regex-match loop to detect trailing parts in lines
  67     j = 0
  68 
  69     # matches is to keep track of whether any matches occurred
  70     matches = 0
  71 
  72     # replace all regex-matches on the line by surrounding each
  73     # matched substring with ANSI styles/resets
  74     for m in ext_re.finditer(s):
  75         matches += 1
  76         # remember previous index-end, used to emit the part before
  77         # the current match
  78         start = j
  79 
  80         i = m.start()
  81         j = m.end()
  82 
  83         # write part before match
  84         write_slice(w, s, start, i)
  85 
  86         ext = s[i:j]
  87 
  88         # pick style for the current match
  89         if ext in styles:
  90             st = styles[ext]
  91         else:
  92             # first occurrence of this extension
  93             st = palette[len(styles) % len(palette)]
  94             styles[ext] = st
  95 
  96         # style the extension
  97         w.write(st)
  98         w.write(ext)
  99         w.write('\x1b[0m')
 100         ext = ''
 101 
 102     if matches == 0:
 103         # show lines with no matches
 104         w.write(s)
 105     elif j > 0:
 106         # don't forget trailing part of line
 107         w.write(s[j:])
 108 
 109     # don't forget to end the line
 110     w.write('\n')
 111     w.flush()
 112 
 113 
 114 def write_slice(w, s: str, start: int, end: int) -> None:
 115     # '''Emit slice-like substrings without allocating slices.'''
 116     # for i in range(start, end):
 117     #     w.write(s[i])
 118     w.write(s[start:end])
 119 
 120 
 121 def seems_url(s: str) -> bool:
 122     for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'):
 123         if s.startswith(prot):
 124             return True
 125     return False
 126 
 127 
 128 # ext_re is the regex used to find all file extensions
 129 ext_re = compile('\.([0-9][a-z]+|[a-z][a-z0-9]+)', flags=IGNORECASE)
 130 
 131 # palette is the whole list of ANSI-styles used to make extensions stand out
 132 palette = [
 133     '\x1b[38;5;26m', # blue
 134     '\x1b[38;5;166m', # orange
 135     '\x1b[38;5;99m', # purple
 136     '\x1b[38;5;38m', # cyan
 137     '\x1b[38;5;213m', # pink
 138     '\x1b[38;5;29m', # green
 139     '\x1b[31m', # red
 140     '\x1b[38;5;249m', # gray
 141 
 142     # '\x1b[1m', # bold
 143     # '\x1b[4m', # underline
 144     # '\x1b[7m', # inverse
 145 ]
 146 
 147 try:
 148     args = argv[1:]
 149     styles = {}
 150 
 151     if args.count('-') > 1:
 152         msg = 'reading from `-` (standard input) more than once not allowed'
 153         raise ValueError(msg)
 154 
 155     stdout.reconfigure(newline='\n', encoding='utf-8')
 156 
 157     # handle all named inputs given
 158     for path in args:
 159         if path == '-':
 160             for line in stdin:
 161                 style_line(stdout, line, styles)
 162             continue
 163 
 164         if seems_url(path):
 165             with urlopen(path) as inp:
 166                 for line in inp:
 167                     style_line(stdout, str(line, encoding='utf-8'), styles)
 168             continue
 169 
 170         with open(path) as inp:
 171             for line in inp:
 172                 style_line(stdout, line, styles)
 173 
 174     # when no filenames are given, handle lines from stdin
 175     if len(args) == 0:
 176         for line in stdin:
 177             style_line(stdout, line, styles)
 178 except BrokenPipeError:
 179     # quit quietly, instead of showing a confusing error message
 180     stderr.flush()
 181     stderr.close()
 182 except KeyboardInterrupt:
 183     # quit quietly, instead of showing a confusing error message
 184     stderr.flush()
 185     stderr.close()
 186     exit(2)
 187 except Exception as e:
 188     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 189     exit(1)