File: nn.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # Note: string slicing is a major source of inefficiencies in this script,
  27 # making it viable only for small inputs; it's not clear what the stdlib
  28 # offers to loop over sub-strings without copying data, which is really
  29 # needed in this case.
  30 #
  31 # In the end the code has become much uglier by using explicit index-pairs,
  32 # which are used/updated all over to avoid copying sub-strings. Standard
  33 # output is already line-buffered by default, which is makes writing to it
  34 # already fairly fast.
  35 
  36 
  37 from io import TextIOWrapper
  38 from sys import argv, exit, stderr, stdin, stdout
  39 
  40 
  41 info = '''
  42 nn [option...] [filepaths/URIs...]
  43 
  44 
  45 Nice Numbers restyles all runs of 4+ digits by alternating ANSI-styles
  46 every 3-digit group, so long numbers become easier to read at a glance.
  47 
  48 All (optional) leading options start with either single or double-dash,
  49 and most of them change the style/color used. Some of the options are,
  50 shown in their single-dash form:
  51 
  52     -h          show this help message
  53     -help       show this help message
  54 
  55     -b          use a blue color
  56     -blue       use a blue color
  57     -bold       bold-style digits
  58     -g          use a green color
  59     -gray       use a gray color (default)
  60     -green      use a green color
  61     -hi         use a highlighting/inverse style
  62     -highlight  use a highlighting/inverse style
  63     -hilite     use a highlighting/inverse style
  64     -inverse    use a highlighting/inverse style
  65     -m          use a magenta color
  66     -magenta    use a magenta color
  67     -o          use an orange color
  68     -orange     use an orange color
  69     -p          use a purple color
  70     -purple     use a purple color
  71     -r          use a red color
  72     -red        use a red color
  73     -u          underline digits
  74     -underline  underline digits
  75 '''
  76 
  77 # handle standard help cmd-line options, quitting right away in that case
  78 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  79     print(info.strip(), file=stderr)
  80     exit(0)
  81 
  82 # names_aliases normalizes lookup keys for the actual style-lookup table
  83 names_aliases = {
  84     'b': 'blue',
  85     'g': 'green',
  86     'm': 'magenta',
  87     'o': 'orange',
  88     'p': 'purple',
  89     'r': 'red',
  90     'u': 'underline',
  91 
  92     'bb': 'bblue',
  93     'bg': 'bgreen',
  94     'bm': 'bmagenta',
  95     'bo': 'borange',
  96     'bp': 'bpurple',
  97     'br': 'bred',
  98     'bu': 'bunderline',
  99 
 100     'bb': 'bblue',
 101     'gb': 'bgreen',
 102     'mb': 'bmagenta',
 103     'ob': 'borange',
 104     'pb': 'bpurple',
 105     'rb': 'bred',
 106     'ub': 'bunderline',
 107 
 108     'hi': 'inverse',
 109     'inv': 'inverse',
 110     'mag': 'magenta',
 111 
 112     'flip': 'inverse',
 113     'swap': 'inverse',
 114 
 115     'reset': 'plain',
 116     'highlight': 'inverse',
 117     'hilite': 'inverse',
 118     'invert': 'inverse',
 119     'inverted': 'inverse',
 120     'swapped': 'inverse',
 121 
 122     'blueback': 'bblue',
 123     'grayback': 'bgray',
 124     'greenback': 'bgreen',
 125     'magback': 'bmagenta',
 126     'magentaback': 'bmagenta',
 127     'orangeback': 'borange',
 128     'purpleback': 'bpurple',
 129     'redback': 'bred',
 130 
 131     'bgblue': 'bblue',
 132     'bggray': 'bgray',
 133     'bggreen': 'bgreen',
 134     'bgmag': 'bmagenta',
 135     'bgmagenta': 'bmagenta',
 136     'bgorange': 'borange',
 137     'bgpurple': 'bpurple',
 138     'bgred': 'bred',
 139 
 140     'bluebg': 'bblue',
 141     'graybg': 'bgray',
 142     'greenbg': 'bgreen',
 143     'magbg': 'bmagenta',
 144     'magentabg': 'bmagenta',
 145     'orangebg': 'borange',
 146     'purplebg': 'bpurple',
 147     'redbg': 'bred',
 148 
 149     'backblue': 'bblue',
 150     'backgray': 'bgray',
 151     'backgreen': 'bgreen',
 152     'backmag': 'bmagenta',
 153     'backmagenta': 'bmagenta',
 154     'backorange': 'borange',
 155     'backpurple': 'bpurple',
 156     'backred': 'bred',
 157 }
 158 
 159 # names2styles matches color/style names to their ANSI-style strings
 160 names2styles = {
 161     'blue': '\x1b[38;5;26m',
 162     'bold': '\x1b[1m',
 163     'gray': '\x1b[38;5;248m',
 164     'green': '\x1b[38;5;29m',
 165     'inverse': '\x1b[7m',
 166     'magenta': '\x1b[38;5;165m',
 167     'orange': '\x1b[38;5;166m',
 168     'plain': '\x1b[0m',
 169     'purple': '\x1b[38;5;99m',
 170     'red': '\x1b[31m',
 171     'underline': '\x1b[4m',
 172 
 173     'bblue': '\x1b[48;5;26m\x1b[38;5;15m',
 174     'bgray': '\x1b[48;5;248m\x1b[38;5;15m',
 175     'bgreen': '\x1b[48;5;29m\x1b[38;5;15m',
 176     'bmagenta': '\x1b[48;5;165m\x1b[38;5;15m',
 177     'borange': '\x1b[48;5;166m\x1b[38;5;15m',
 178     'bpurple': '\x1b[48;5;99m\x1b[38;5;15m',
 179     'bred': '\x1b[41m\x1b[38;5;15m',
 180 }
 181 
 182 
 183 def restyle_line(w, line: str, style: str) -> None:
 184     'Alternate styles for runs of digits in the string given.'
 185 
 186     start = 0
 187     end = len(line)
 188     if end > 1 and line[end - 2] == '\r' and line[end - 1] == '\n':
 189         end -= 2
 190     elif end > 0 and line[end - 1] == '\n':
 191         end -= 1
 192 
 193     while True:
 194         # see if line is over
 195         if start >= end:
 196             w.write('\n')
 197             return
 198 
 199         # find where the next run of digits starts; a negative index means
 200         # none were found
 201         i = -1
 202         for j in range(start, end):
 203             if line[j].isdigit():
 204                 i = j
 205                 break
 206 
 207         # check if rest of the line has no more digits
 208         if i < 0:
 209             w.write(line[start:end])
 210             w.write('\n')
 211             return
 212 
 213         # some ANSI-style sequences use 4-digit numbers, which are long
 214         # enough for this script to mangle
 215         is_ansi = i >= 2 and line[i-2] == '\x1b' and line[i-1] == '['
 216 
 217         # emit line up to right before the next run of digits starts
 218         w.write(line[start:i])
 219         start = i
 220 
 221         # find where/if the current run of digits ends; a negative index
 222         # means the run reaches the end of the line
 223         i = -1
 224         for j in range(start, end):
 225             if not line[j].isdigit():
 226                 i = j
 227                 break
 228 
 229         # check if rest of the line has only digits in it
 230         if i < 0:
 231             if not is_ansi:
 232                 restyle_digits(w, line, start, end, style)
 233             else:
 234                 w.write(line[start:end])
 235             w.write('\n')
 236             return
 237 
 238         # emit digits using alternate styling, and advance past them
 239         if not is_ansi:
 240             restyle_digits(w, line, start, i, style)
 241         else:
 242             w.write(line[start:i])
 243         start = i
 244 
 245 
 246 def restyle_digits(w, digits: str, start: int, end: int, style: str) -> None:
 247     'Alternate styles on 3-item chunks from the string given.'
 248 
 249     diff = end - start
 250 
 251     # it's overall quicker to just emit short-enough digit-runs verbatim
 252     if diff < 4:
 253         w.write(digits[start:end])
 254         return
 255 
 256     # emit leading chunk of digits, which is the only one which
 257     # can have fewer than 3 items
 258     lead = diff % 3
 259     w.write(digits[start:start + lead])
 260 
 261     # the rest of the sub-string now has a multiple of 3 items left
 262     start += lead
 263 
 264     # start by styling the next digit-group only if there was a
 265     # non-empty leading group at the start of the full digit-run
 266     use_style = lead > 0
 267 
 268     # alternate styles until the string is over
 269     while start < end:
 270         # the digits left are always a multiple of 3
 271         stop = start + 3
 272 
 273         if use_style:
 274             w.write(style)
 275             w.write(digits[start:stop])
 276             w.write('\x1b[0m')
 277         else:
 278             w.write(digits[start:stop])
 279 
 280         # switch style and advance to the next 3-digit chunk
 281         use_style = not use_style
 282         start = stop
 283 
 284 
 285 def seems_url(s: str) -> bool:
 286     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 287     return any(s.startswith(p) for p in protocols)
 288 
 289 
 290 def handle_lines(w, src, style: str) -> None:
 291     for line in src:
 292         restyle_line(w, line, style)
 293 
 294 
 295 args = argv[1:]
 296 style = names2styles['gray']
 297 
 298 # handle leading style/color option, if present
 299 if len(args) > 0 and args[0].startswith('-'):
 300     s = args[0].lstrip('-')
 301     if s in names_aliases:
 302         s = names_aliases[s]
 303 
 304     if s in names2styles:
 305         style = names2styles[s]
 306         # skip leading arg, since it's clearly not a filepath
 307         args = args[1:]
 308 
 309 if any(seems_url(e) for e in args):
 310     from urllib.request import urlopen
 311 
 312 try:
 313     if args.count('-') > 1:
 314         msg = 'reading from `-` (standard input) more than once not allowed'
 315         raise ValueError(msg)
 316 
 317     for path in args:
 318         if path == '-':
 319             handle_lines(stdout, stdin, style)
 320             continue
 321 
 322         if seems_url(path):
 323             with urlopen(path) as inp:
 324                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 325                     handle_lines(stdout, txt, style)
 326             continue
 327 
 328         with open(path, encoding='utf-8') as inp:
 329             handle_lines(stdout, inp, style)
 330 
 331     if len(args) == 0:
 332         handle_lines(stdout, stdin, style)
 333 except BrokenPipeError:
 334     # quit quietly, instead of showing a confusing error message
 335     stderr.close()
 336 except KeyboardInterrupt:
 337     exit(2)
 338 except Exception as e:
 339     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 340     exit(1)