File: nn.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # Notes
  27 #
  28 # String-slicing was a major source of inefficiencies in this script, making
  29 # it viable only for small inputs; it's not clear what the stdlib offers to
  30 # loop over sub-strings without copying data, which is really needed in this
  31 # case.
  32 #
  33 # String-slicing seems to be amortized in more recent versions of Python.
  34 #
  35 # In the end the code has become much uglier by using explicit index-pairs,
  36 # which are used/updated all over to avoid copying sub-strings.
  37 
  38 
  39 from io import SEEK_CUR
  40 from sys import argv, exit, stderr, stdin, stdout
  41 
  42 
  43 info = '''
  44 nn [option...] [filepaths/URIs...]
  45 
  46 
  47 Nice Numbers restyles all runs of 4+ digits by alternating ANSI-styles
  48 every 3-digit group, so long numbers become easier to read at a glance.
  49 
  50 All (optional) leading options start with either single or double-dash,
  51 and most of them change the style/color used. Some of the options are,
  52 shown in their single-dash form:
  53 
  54     -h          show this help message
  55     -help       show this help message
  56 
  57     -b          use a blue color
  58     -blue       use a blue color
  59     -bold       bold-style digits
  60     -g          use a green color
  61     -gray       use a gray color (default)
  62     -green      use a green color
  63     -hi         use a highlighting/inverse style
  64     -highlight  use a highlighting/inverse style
  65     -hilite     use a highlighting/inverse style
  66     -inverse    use a highlighting/inverse style
  67     -m          use a magenta color
  68     -magenta    use a magenta color
  69     -o          use an orange color
  70     -orange     use an orange color
  71     -p          use a purple color
  72     -purple     use a purple color
  73     -r          use a red color
  74     -red        use a red color
  75     -u          underline digits
  76     -underline  underline digits
  77 '''
  78 
  79 # handle standard help cmd-line options, quitting right away in that case
  80 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
  81     print(info.strip())
  82     exit(0)
  83 
  84 
  85 # names_aliases normalizes lookup keys for table names2styles
  86 names_aliases = {
  87     'b': 'blue',
  88     'g': 'green',
  89     'm': 'magenta',
  90     'o': 'orange',
  91     'p': 'purple',
  92     'r': 'red',
  93     'u': 'underline',
  94 
  95     'bb': 'blueback',
  96     'bg': 'greenback',
  97     'bm': 'magentaback',
  98     'bo': 'orangeback',
  99     'bp': 'purpleback',
 100     'br': 'redback',
 101 
 102     'gb': 'greenback',
 103     'mb': 'magentaback',
 104     'ob': 'orangeback',
 105     'pb': 'purpleback',
 106     'rb': 'redback',
 107 
 108     'hi': 'inverse',
 109     'inv': 'inverse',
 110     'mag': 'magenta',
 111 
 112     'flip': 'inverse',
 113     'swap': 'inverse',
 114 
 115     'reset': 'plain',
 116     'highlight': 'inverse',
 117     'hilite': 'inverse',
 118     'invert': 'inverse',
 119     'inverted': 'inverse',
 120     'swapped': 'inverse',
 121 
 122     'bblue': 'blueback',
 123     'bgray': 'grayback',
 124     'bgreen': 'greenback',
 125     'bmagenta': 'magentaback',
 126     'borange': 'orangeback',
 127     'bpurple': 'purpleback',
 128     'bred': 'redback',
 129 
 130     'bgblue': 'blueback',
 131     'bggray': 'grayback',
 132     'bggreen': 'greenback',
 133     'bgmag': 'magentaback',
 134     'bgmagenta': 'magentaback',
 135     'bgorange': 'orangeback',
 136     'bgpurple': 'purpleback',
 137     'bgred': 'redback',
 138 
 139     'bluebg': 'blueback',
 140     'graybg': 'grayback',
 141     'greenbg': 'greenback',
 142     'magbg': 'magentaback',
 143     'magentabg': 'magentaback',
 144     'orangebg': 'orangeback',
 145     'purplebg': 'purpleback',
 146     'redbg': 'redback',
 147 
 148     'backblue': 'blueback',
 149     'backgray': 'grayback',
 150     'backgreen': 'greenback',
 151     'backmag': 'magentaback',
 152     'backmagenta': 'magentaback',
 153     'backorange': 'orangeback',
 154     'backpurple': 'purpleback',
 155     'backred': 'redback',
 156 }
 157 
 158 # names2styles matches color/style names to their ANSI-style strings
 159 names2styles = {
 160     'blue': '\x1b[38;2;0;95;215m',
 161     'bold': '\x1b[1m',
 162     'gray': '\x1b[38;2;168;168;168m',
 163     'green': '\x1b[38;2;0;135;95m',
 164     'inverse': '\x1b[7m',
 165     'magenta': '\x1b[38;2;215;0;255m',
 166     'orange': '\x1b[38;2;215;95;0m',
 167     'plain': '\x1b[0m',
 168     'purple': '\x1b[38;2;135;95;255m',
 169     'red': '\x1b[38;2;204;0;0m',
 170     'underline': '\x1b[4m',
 171 
 172     'blueback': '\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m',
 173     'grayback': '\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m',
 174     'greenback': '\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m',
 175     'magentaback': '\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m',
 176     'orangeback': '\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m',
 177     'purpleback': '\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m',
 178     'redback': '\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m',
 179 }
 180 
 181 
 182 def restyle_line(w, line: str, style: str) -> None:
 183     'Alternate styles for runs of digits in the string given.'
 184 
 185     start = 0
 186     end = len(line)
 187     if end > 1 and line[end - 2] == '\r' and line[end - 1] == '\n':
 188         end -= 2
 189     elif end > 0 and line[end - 1] == '\n':
 190         end -= 1
 191 
 192     while True:
 193         # see if line is over
 194         if start >= end:
 195             w.write('\n')
 196             return
 197 
 198         # find where the next run of digits starts, if present
 199         i = -1
 200         for j in range(start, end):
 201             if line[j].isdigit():
 202                 i = j
 203                 break
 204 
 205         # check if rest of the line has no more digits
 206         if i < 0:
 207             w.write(line[start:end])
 208             w.write('\n')
 209             return
 210 
 211         # some ANSI-style sequences use 4-digit numbers, which are long
 212         # enough for this script to mangle
 213         is_ansi = i >= 2 and line[i-2] == '\x1b' and line[i-1] == '['
 214 
 215         # emit line up to right before the next run of digits starts
 216         w.write(line[start:i])
 217         start = i
 218 
 219         # find where/if the current run of digits ends
 220         i = -1
 221         for j in range(start, end):
 222             if not line[j].isdigit():
 223                 i = j
 224                 break
 225 
 226         # check if rest of the line has only digits in it
 227         if i < 0:
 228             if not is_ansi:
 229                 restyle_digits(w, line, start, end, style)
 230             else:
 231                 w.write(line[start:end])
 232             w.write('\n')
 233             return
 234 
 235         # emit digits using alternate styling, and advance past them
 236         if not is_ansi:
 237             restyle_digits(w, line, start, i, style)
 238         else:
 239             w.write(line[start:i])
 240         start = i
 241 
 242 
 243 def restyle_digits(w, digits: str, start: int, end: int, style: str) -> None:
 244     'Alternate styles on 3-item chunks from the string given.'
 245 
 246     diff = end - start
 247 
 248     # it's overall quicker to just emit short-enough digit-runs verbatim
 249     if diff < 4:
 250         w.write(digits[start:end])
 251         return
 252 
 253     # emit leading chunk of digits, which is the only one which
 254     # can have fewer than 3 items
 255     lead = diff % 3
 256     w.write(digits[start:start + lead])
 257 
 258     # the rest of the sub-string now has a multiple of 3 items left
 259     start += lead
 260 
 261     # start by styling the next digit-group only if there was a
 262     # non-empty leading group at the start of the full digit-run
 263     use_style = lead > 0
 264 
 265     # alternate styles until the string is over
 266     while start < end:
 267         # the digits left are always a multiple of 3
 268         stop = start + 3
 269 
 270         if use_style:
 271             w.write(style)
 272             w.write(digits[start:stop])
 273             w.write('\x1b[0m')
 274         else:
 275             w.write(digits[start:stop])
 276 
 277         # switch style and advance to the next 3-digit chunk
 278         use_style = not use_style
 279         start = stop
 280 
 281 
 282 def seems_url(s: str) -> bool:
 283     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 284     return any(s.startswith(p) for p in protocols)
 285 
 286 
 287 def handle_lines(w, src, style: str, live: bool) -> None:
 288     for line in src:
 289         restyle_line(w, line, style, live)
 290         if live:
 291             w.flush()
 292 
 293 
 294 args = argv[1:]
 295 style = names2styles['gray']
 296 
 297 # handle leading style/color option, if present
 298 if len(args) > 0 and args[0].startswith('-'):
 299     s = args[0].lstrip('-')
 300     if s in names_aliases:
 301         s = names_aliases[s]
 302 
 303     if s in names2styles:
 304         style = names2styles[s]
 305         args = args[1:]
 306 
 307 if any(seems_url(e) for e in args):
 308     from io import TextIOWrapper
 309     from urllib.request import urlopen
 310 
 311 try:
 312     if args.count('-') > 1:
 313         msg = 'reading from `-` (standard input) more than once not allowed'
 314         raise ValueError(msg)
 315 
 316     try:
 317         stdout.seek(0, SEEK_CUR)
 318         live = False
 319     except:
 320         live = True
 321 
 322     for path in args:
 323         if path == '-':
 324             handle_lines(stdout, stdin, style, live)
 325             continue
 326 
 327         if seems_url(path):
 328             with urlopen(path) as inp:
 329                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 330                     handle_lines(stdout, txt, style, live)
 331             continue
 332 
 333         with open(path, encoding='utf-8') as inp:
 334             handle_lines(stdout, inp, style, live)
 335 
 336     if len(args) == 0:
 337         handle_lines(stdout, stdin, style, live)
 338 except BrokenPipeError:
 339     # quit quietly, instead of showing a confusing error message
 340     stderr.close()
 341     exit(0)
 342 except KeyboardInterrupt:
 343     exit(2)
 344 except Exception as e:
 345     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 346     exit(1)