File: tl.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # tl [options...] [python expressions...]
  27 #
  28 # Transform Lines runs Python expressions on each line from standard input:
  29 # each expression given emits its result as its own line, so input lines are
  30 # `amplified` when using multipe formulas, so to speak.
  31 #
  32 # When a formula's result is None/null, it emits no output line.
  33 #
  34 # Each input line is available to the expression as variables named `l`,
  35 # `line`, `s`, `v`, `value`, `d`, and `data`.
  36 #
  37 # Each input line is also parsed into a floating-point number named `f`,
  38 # which is NaN when the line can't parse into a number.
  39 #
  40 # Options, where leading double-dashes are also allowed:
  41 #
  42 #   -h         show this help message
  43 #   -help      same as -h
  44 #
  45 #   -jsonl     transform JSON Lines into proper JSON, without formulas
  46 #
  47 #   -n         No input / load Nothing
  48 #   -nil       same as -n
  49 #   -none      same as -n
  50 #   -null      same as -n
  51 
  52 
  53 from base64 import \
  54     standard_b64encode as base64bytes, standard_b64decode as debase64bytes
  55 
  56 from datetime import \
  57     MAXYEAR, MINYEAR, date, datetime, time, timedelta, timezone, tzinfo
  58 try:
  59     from datetime import UTC
  60 except:
  61     pass
  62 
  63 from functools import \
  64     cache, cached_property, cmp_to_key, get_cache_token, lru_cache, \
  65     namedtuple, partial, partialmethod, recursive_repr, reduce, \
  66     singledispatch, singledispatchmethod, total_ordering, update_wrapper, \
  67     wraps
  68 
  69 from itertools import \
  70     accumulate, chain, combinations, combinations_with_replacement, \
  71     compress, count, cycle, dropwhile, filterfalse, groupby, islice, \
  72     permutations, product, repeat, starmap, takewhile, tee, zip_longest
  73 try:
  74     from itertools import pairwise
  75 except:
  76     pass
  77 
  78 from json import dump, dumps, loads
  79 
  80 import math
  81 from math import \
  82     acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \
  83     copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \
  84     fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \
  85     isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \
  86     log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \
  87     radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp
  88 try:
  89     from math import cbrt, exp2
  90 except:
  91     pass
  92 
  93 from random import \
  94     betavariate, choice, choices, expovariate, gammavariate, gauss, \
  95     getrandbits, getstate, lognormvariate, normalvariate, paretovariate, \
  96     randbytes, randint, random, randrange, sample, seed, setstate, \
  97     shuffle, triangular, uniform, vonmisesvariate, weibullvariate
  98 
  99 compile_py = compile # keep built-in func compile for later
 100 from re import compile as compile_uncached, Pattern
 101 
 102 from statistics import \
 103     bisect_left, bisect_right, fmean, \
 104     geometric_mean, harmonic_mean, mean, median, \
 105     median_grouped, median_high, median_low, mode, multimode, pstdev, \
 106     pvariance, quantiles, stdev, variance
 107 try:
 108     from statistics import \
 109         correlation, covariance, linear_regression, mul, reduce
 110 except:
 111     pass
 112 
 113 from string import \
 114     Formatter, Template, ascii_letters, ascii_lowercase, ascii_uppercase, \
 115     capwords, digits, hexdigits, octdigits, printable, punctuation, \
 116     whitespace
 117 
 118 from sys import argv, stdin, stdout
 119 
 120 from textwrap import dedent, fill, indent, shorten, wrap
 121 
 122 from time import \
 123     altzone, asctime, \
 124     ctime, daylight, get_clock_info, \
 125     gmtime, localtime, mktime, monotonic, monotonic_ns, perf_counter, \
 126     perf_counter_ns, process_time, process_time_ns, \
 127     sleep, strftime, strptime, struct_time, thread_time, thread_time_ns, \
 128     time, time_ns, timezone, tzname
 129 try:
 130     from time import \
 131         clock_getres, clock_gettime, clock_gettime_ns, clock_settime, \
 132         clock_settime_ns, pthread_getcpuclockid, tzset
 133 except:
 134     pass
 135 
 136 # some defined funcs exposed to formulas use type declarations
 137 from typing import Any, Iterable, List
 138 
 139 from unicodedata import \
 140     bidirectional, category, combining, decimal, decomposition, digit, \
 141     east_asian_width, is_normalized, lookup, mirrored, name, normalize, \
 142     numeric
 143 
 144 
 145 # info is the message shown when the script isn't given any argument, or
 146 # when the leading argument is one of the standard cmd-line help options
 147 info = '''
 148 tl [options...] [python expressions...]
 149 
 150 Transform Lines runs Python expressions on each line from standard input:
 151 each expression given emits its result as its own line, so input lines are
 152 `amplified` when using multipe formulas, so to speak.
 153 
 154 When a formula's result is None/null, it emits no output line.
 155 
 156 Each input line is available to the expression as variables named `l`,
 157 `line`, `s`, `v`, `value`, `d`, and `data`.
 158 
 159 Each input line is also parsed into a floating-point number named `f`,
 160 which is NaN when the line can't parse into a number.
 161 
 162 Options, where leading double-dashes are also allowed:
 163 
 164     -h          show this help message
 165     -help       same as -h
 166 
 167     -jsonl     transform JSON Lines into proper JSON, without formulas
 168 
 169     -n          No input / load Nothing
 170     -nil        same as -n
 171     -none       same as -n
 172     -null       same as -n
 173 '''.strip()
 174 
 175 
 176 # no args or a leading help-option arg means show the help message and quit
 177 if len(argv) < 2 or argv[1].lower() in ('-h', '--h', '-help', '--help'):
 178     from sys import exit, stderr
 179     print(info, file=stderr)
 180     exit(0)
 181 
 182 
 183 # re_cache is used by custom func compile to cache previously-compiled
 184 # regular-expressions, which makes them quicker to (re)use in formulas
 185 re_cache = {}
 186 
 187 # ansi_style_re detects the most commonly-used ANSI-style sequences, and
 188 # is used in func plain
 189 ansi_style_re = compile_uncached('\x1b\[([0-9;]+m|[0-9]*[A-HJKST])')
 190 
 191 # paddable_tab_re detects single tabs and possible runs of spaces around
 192 # them, and is used in func squeeze
 193 paddable_tab_re = compile_uncached(' *\t *')
 194 
 195 # spaces_re detects runs of 2 or more spaces, and is used in func squeeze
 196 spaces_re = compile_uncached('  +')
 197 
 198 
 199 # some convenience aliases to commonly-used values
 200 true = True
 201 false = False
 202 nil = None
 203 none = None
 204 null = None
 205 block = ''
 206 cdot = '·'
 207 colon = ':'
 208 comma = ','
 209 crlf = '\r\n'
 210 dot = '.'
 211 empty = ''
 212 lf = '\n'
 213 mdot = '·'
 214 semicolon = ';'
 215 space = ' '
 216 tab = '\t'
 217 utf8bom = '\xef\xbb\xbf'
 218 
 219 # some occasionally-useful values
 220 kb = 1024
 221 mb = 1024 * kb
 222 gb = 1024 * mb
 223 tb = 1024 * gb
 224 pb = 1024 * tb
 225 
 226 months = [
 227     'January', 'February', 'March', 'April', 'May', 'June',
 228     'July', 'August', 'September', 'October', 'November', 'December',
 229 ]
 230 
 231 monweek = [
 232     'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
 233     'Saturday', 'Sunday',
 234 ]
 235 
 236 sunweek = [
 237     'Sunday',
 238     'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
 239 ]
 240 
 241 # some convenience aliases to various funcs from the python stdlib
 242 geomean = geometric_mean
 243 harmean = harmonic_mean
 244 sd = stdev
 245 popsd = pstdev
 246 var = variance
 247 popvar = pvariance
 248 randbeta = betavariate
 249 randexp = expovariate
 250 randgamma = gammavariate
 251 randlognorm = lognormvariate
 252 randnorm = normalvariate
 253 randweibull = weibullvariate
 254 
 255 
 256 def jsonl2json(w, src) -> None:
 257     '''Turn JSON Lines read from the source given into proper JSON.'''
 258 
 259     i = 0
 260     w.write('[')
 261     for line in src:
 262         if i > 0:
 263             w.write(',\n  ')
 264         else:
 265             w.write('\n  ')
 266         dump(line.rstrip('\r\n').rstrip('\n'), w)
 267         i += 1
 268 
 269     if i == 0:
 270         w.write(']\n')
 271     else:
 272         w.write('\n]\n')
 273 
 274 
 275 def run(expressions: List[str], line: str) -> None:
 276     '''Run all expressions given on each line of the input-source given'''
 277 
 278     exec = None
 279     # prevent expressions from opening files
 280     open = None
 281     stdin = None
 282     stdout = None
 283 
 284     # give the formulas various aliases for the current-line value
 285     d = line
 286     data = line
 287     v = line
 288     value = line
 289     # input = line
 290     l = line
 291     s = line
 292 
 293     f = 0.0
 294     try:
 295         f = float(v)
 296     except:
 297         f = nan
 298 
 299     prev = None
 300     for expr in expressions:
 301         prev = eval(expr)
 302         if prev == None:
 303             continue
 304 
 305         if isinstance(prev, Iterable) and not isinstance(prev, str):
 306             for e in prev:
 307                 print(e)
 308         elif isinstance(prev, dict):
 309             for k, v in prev.items():
 310                 print(f'{k}\t{v}')
 311         else:
 312             print(prev)
 313 
 314 
 315 def dedup(v: Iterable) -> List:
 316     '''Ignore reappearing items from iterables: result is always a list'''
 317 
 318     got = set()
 319     dedup = []
 320     for e in v:
 321         if not e in got:
 322             got.add(e)
 323             dedup.append(e)
 324     return dedup
 325 
 326 unique = dedup
 327 
 328 def fix(x: Any, repl: Any = None) -> Any:
 329     '''Make values JSON-compatible'''
 330 
 331     if x == None:
 332         return x
 333     elif isinstance(x, bool) or isinstance(x, int) or isinstance(x, str):
 334         return x
 335     elif isinstance(x, float):
 336         # turn NaNs and Infinities into nulls, or the replacement value given
 337         return x if not (isnan(x) or isinf(x)) else repl
 338     elif isinstance(x, list):
 339         return [fix(e) for e in x]
 340     elif isinstance(x, dict):
 341         return { k: fix(e) for k, e in x.items() }
 342     else:
 343         return str(x)
 344 
 345 def after(s: str, *what: str) -> str:
 346     for t in what:
 347         i = s.find(t)
 348         s = '' if i < 0 else s[i+len(t):]
 349     return s
 350 
 351 def after_last(s: str, *what: str) -> str:
 352     for t in what:
 353         i = s.rfind(t)
 354         s = '' if i < 0 else s[i+len(t):]
 355     return s
 356 
 357 afterlast = after_last
 358 
 359 def before(s: str, *what: str) -> str:
 360     for t in what:
 361         i = s.find(t)
 362         s = s if i < 0 else s[:i]
 363     return s
 364 
 365 def before_last(s: str, *what: str) -> str:
 366     for t in what:
 367         i = s.rfind(t)
 368         s = s if i < 0 else s[:i]
 369     return s
 370 
 371 beforelast = before_last
 372 
 373 def since(s: str, *what: str) -> str:
 374     for t in what:
 375         i = s.find(t)
 376         s = '' if i < 0 else s[i:]
 377     return s
 378 
 379 def since_last(s: str, *what: str) -> str:
 380     for t in what:
 381         i = s.rfind(t)
 382         s = '' if i < 0 else s[i:]
 383     return s
 384 
 385 sincelast = since_last
 386 
 387 def until(s: str, *what: str) -> str:
 388     for t in what:
 389         i = s.find(t)
 390         s = s if i < 0 else s[:i+len(t)]
 391     return s
 392 
 393 def skip_empty(s: str) -> Any:
 394     return s if s != '' else None
 395 
 396 skipempty = skip_empty
 397 
 398 def now() -> datetime:
 399     return datetime.now()
 400 
 401 def gsub(s: str, what: str, repl: str) -> str:
 402     '''Replace all regex-matches with the string given'''
 403     return compile(what).sub(repl, s)
 404 
 405 def base64(x):
 406     return base64bytes(str(x).encode()).decode()
 407 
 408 def debase64(x):
 409     return debase64bytes(str(x).encode()).decode()
 410 
 411 def compile(s: str) -> Pattern:
 412     '''Cached regex `compiler`, so it's quicker to (re)use in formulas'''
 413 
 414     if s in re_cache:
 415         return re_cache[s]
 416     e = compile_uncached(s)
 417     re_cache[s] = e
 418     return e
 419 
 420 def squeeze(s: str) -> str:
 421     '''
 422     A more aggressive way to rid strings of extra spaces which,
 423     unlike string method strip, also squeezes inner runs of
 424     multiple spaces into single spaces
 425     '''
 426 
 427     s = s.strip()
 428     s = spaces_re.sub(' ', s)
 429     s = paddable_tab_re.sub('\t', s)
 430     return s
 431 
 432 def float_or(s: str, default: Any = nan) -> Any:
 433     try:
 434         return float(s)
 435     except:
 436         return default
 437 
 438 floator = float_or
 439 
 440 def plain(s: str) -> str:
 441     return ansi_style_re.sub('', s)
 442 
 443 def blue(s: Any) -> str:
 444     return f'\x1b[38;5;26m{s}\x1b[0m'
 445 
 446 def bold(s: Any) -> str:
 447     return f'\x1b[1m{s}\x1b[0m'
 448 
 449 def gray(s: Any) -> str:
 450     return f'\x1b[38;5;249m{s}\x1b[0m'
 451 
 452 def green(s: Any) -> str:
 453     return f'\x1b[38;5;29m{s}\x1b[0m'
 454 
 455 def highlight(s: Any) -> str:
 456     return f'\x1b[7m{s}\x1b[0m'
 457 
 458 hilite = highlight
 459 
 460 def magenta(s: Any) -> str:
 461     return f'\x1b[38;5;165m{s}\x1b[0m'
 462 
 463 def orange(s: Any) -> str:
 464     return f'\x1b[38;5;166m{s}\x1b[0m'
 465 
 466 def purple(s: Any) -> str:
 467     return f'\x1b[38;5;99m{s}\x1b[0m'
 468 
 469 def red(s: Any) -> str:
 470     return f'\x1b[38;5;1m{s}\x1b[0m'
 471 
 472 def underline(s: Any) -> str:
 473     return f'\x1b[4m{s}\x1b[0m'
 474 
 475 
 476 
 477 # args is the `proper` list of arguments given to the script
 478 args = argv[1:]
 479 use_input = True
 480 jsonl = False
 481 expressions = args
 482 
 483 if len(args) == 0:
 484     # show help message when given no arguments
 485     from sys import exit, stderr
 486     print(info, file=stderr)
 487     exit(0)
 488 else:
 489     # handle all other leading options; the explicit help options are
 490     # handled earlier in the script
 491     l = args[0].lower()
 492     if l in (
 493         '-c', '--c', '-n', '--n', '-nil', '--nil', '-none', '--none',
 494         '-null', '--null'):
 495         use_input = False
 496         expressions = args[1:]
 497     elif l in (
 498         '-jsonl', '--jsonl', '-jsonlines', '--jsonlines', '-json-lines',
 499         '--json-lines'):
 500         # enable JSONL mode, and empty the expressions list, to avoid
 501         # possible compilation errors later
 502         jsonl = True
 503         expressions = []
 504 
 505 try:
 506     stdout.reconfigure(newline='\n', encoding='utf-8')
 507 
 508     # compile all expressions to speed them up, since they're all (re)run
 509     # for each line from standard input; also, handle single-dot formulas
 510     # as identity expressions, using the current line as is
 511     expressions = [e if e != '.' else 'line' for e in expressions]
 512     expressions = [compile_py(e, '<string>', 'eval') for e in expressions]
 513     if len(expressions) == 0 and not jsonl:
 514         exit(0)
 515 
 516     if jsonl:
 517         # handle stdin lines as JSONL
 518         stdin.reconfigure(encoding='utf-8')
 519         jsonl2json(stdout, stdin)
 520     elif use_input:
 521         # handle stdin lines with the formulas given
 522         stdin.reconfigure(encoding='utf-8')
 523 
 524         i = 0
 525         for line in stdin:
 526             n = i + 1
 527             nr = i + 1
 528             run(expressions, line.rstrip('\r\n').rstrip('\n'))
 529             i += 1
 530     else:
 531         # run formulas once, with no input
 532         stdin = None
 533         stdout = None
 534         run(expressions, '')
 535 except BrokenPipeError:
 536     # quit quietly, instead of showing a confusing error message
 537     from sys import stderr
 538     stderr.flush()
 539     stderr.close()
 540 except KeyboardInterrupt:
 541     # quit quietly, instead of showing a confusing error message
 542     from sys import exit, stderr
 543     stderr.flush()
 544     stderr.close()
 545     exit(2)
 546 except Exception as e:
 547     from sys import exit, stderr
 548     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 549     exit(1)