File: tj.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # tj [options...] [python expression] [filepath/URI...]
  27 #
  28 # Transform Json loads JSON data, runs a Python expression on it, and emits
  29 # the result as JSON. Input data are available to the expression as variables
  30 # named `v`, `d`, and `data`.
  31 #
  32 # If no file is given, it loads JSON data from its standard input.
  33 #
  34 # Options, where leading double-dashes are also allowed:
  35 #
  36 #   -c         compact JSON output (JSON-0), without unneeded spaces
  37 #   -compact   same as -c
  38 #   -j0        same as -c
  39 #   -json0     same as -c
  40 #   -json-0    same as -c
  41 #
  42 #   -h         show this help message
  43 #   -help      same as -h
  44 #
  45 #   -n         No input / load Nothing
  46 #   -nil       same as -n
  47 #   -none      same as -n
  48 #   -null      same as -n
  49 
  50 
  51 from base64 import \
  52     standard_b64encode as base64bytes, standard_b64decode as debase64bytes
  53 
  54 from datetime import \
  55     MAXYEAR, MINYEAR, date, datetime, time, timedelta, timezone, tzinfo
  56 try:
  57     from datetime import UTC
  58 except:
  59     pass
  60 
  61 from functools import \
  62     cache, cached_property, cmp_to_key, get_cache_token, lru_cache, \
  63     namedtuple, partial, partialmethod, recursive_repr, reduce, \
  64     singledispatch, singledispatchmethod, total_ordering, update_wrapper, \
  65     wraps
  66 
  67 from itertools import \
  68     accumulate, chain, combinations, combinations_with_replacement, \
  69     compress, count, cycle, dropwhile, filterfalse, groupby, islice, \
  70     permutations, product, repeat, starmap, takewhile, tee, zip_longest
  71 try:
  72     from itertools import pairwise
  73 except:
  74     pass
  75 
  76 from json import dump, dumps, load, loads
  77 
  78 import math
  79 from math import \
  80     acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \
  81     copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \
  82     fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \
  83     isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \
  84     log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \
  85     radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp
  86 try:
  87     from math import cbrt, exp2
  88 except:
  89     pass
  90 
  91 from random import \
  92     betavariate, choice, choices, expovariate, gammavariate, gauss, \
  93     getrandbits, getstate, lognormvariate, normalvariate, paretovariate, \
  94     randbytes, randint, random, randrange, sample, seed, setstate, \
  95     shuffle, triangular, uniform, vonmisesvariate, weibullvariate
  96 
  97 from re import compile as compile_uncached, Pattern
  98 
  99 from statistics import \
 100     bisect_left, bisect_right, fmean, \
 101     geometric_mean, harmonic_mean, mean, median, \
 102     median_grouped, median_high, median_low, mode, multimode, pstdev, \
 103     pvariance, quantiles, stdev, variance
 104 try:
 105     from statistics import \
 106         correlation, covariance, linear_regression, mul, reduce
 107 except:
 108     pass
 109 
 110 from string import \
 111     Formatter, Template, ascii_letters, ascii_lowercase, ascii_uppercase, \
 112     capwords, digits, hexdigits, octdigits, printable, punctuation, \
 113     whitespace
 114 
 115 from sys import argv, exit, stderr, stdin, stdout
 116 
 117 from textwrap import dedent, fill, indent, shorten, wrap
 118 
 119 from time import \
 120     altzone, asctime, \
 121     ctime, daylight, get_clock_info, \
 122     gmtime, localtime, mktime, monotonic, monotonic_ns, perf_counter, \
 123     perf_counter_ns, process_time, process_time_ns, \
 124     sleep, strftime, strptime, struct_time, thread_time, thread_time_ns, \
 125     time, time_ns, timezone, tzname
 126 try:
 127     from time import \
 128         clock_getres, clock_gettime, clock_gettime_ns, clock_settime, \
 129         clock_settime_ns, pthread_getcpuclockid, tzset
 130 except:
 131     pass
 132 
 133 # some defined funcs exposed to formulas use type declarations
 134 from typing import Any, Iterable, List
 135 
 136 from unicodedata import \
 137     bidirectional, category, combining, decimal, decomposition, digit, \
 138     east_asian_width, is_normalized, lookup, mirrored, name, normalize, \
 139     numeric
 140 
 141 from urllib.request import urlopen
 142 
 143 
 144 # info is the message shown when the script isn't given any argument, or
 145 # when the leading argument is one of the standard cmd-line help options
 146 info = '''
 147 tj [options...] [python expression] [filepath/URI...]
 148 
 149 Transform Json loads JSON data, runs a Python expression on it, and emits
 150 the result as JSON. Input data are available to the expression as variables
 151 named `v`, `d`, and `data`.
 152 
 153 If no file is given, it loads JSON data from its standard input.
 154 
 155 Options, where leading double-dashes are also allowed:
 156 
 157     -c          compact JSON output (JSON-0), without unneeded spaces
 158     -compact    same as -c
 159     -j0         same as -c
 160     -json0      same as -c
 161     -json-0     same as -c
 162 
 163     -h          show this help message
 164     -help       same as -h
 165 
 166     -n          No input / load Nothing
 167     -nil        same as -n
 168     -none       same as -n
 169     -null       same as -n
 170 '''.strip()
 171 
 172 
 173 # no args or a leading help-option arg means show the help message and quit
 174 if len(argv) < 2 or argv[1].lower() in ('-h', '--h', '-help', '--help'):
 175     print(info, file=stderr)
 176     exit(0)
 177 
 178 
 179 # re_cache is used by custom func compile to cache previously-compiled
 180 # regular-expressions, which makes them quicker to (re)use in formulas
 181 re_cache = {}
 182 
 183 # paddable_tab_re detects single tabs and possible runs of spaces around
 184 # them, and is used in func squeeze
 185 paddable_tab_re = compile_uncached(' *\t *')
 186 
 187 # spaces_re detects runs of 2 or more spaces, and is used in func squeeze
 188 spaces_re = compile_uncached('  +')
 189 
 190 
 191 # some convenience aliases to commonly-used values
 192 true = True
 193 false = False
 194 nil = None
 195 none = None
 196 null = None
 197 block = ''
 198 cdot = '·'
 199 colon = ':'
 200 comma = ','
 201 crlf = '\r\n'
 202 dot = '.'
 203 empty = ''
 204 lf = '\n'
 205 mdot = '·'
 206 semicolon = ';'
 207 space = ' '
 208 tab = '\t'
 209 utf8bom = '\xef\xbb\xbf'
 210 
 211 # some occasionally-useful values
 212 kb = 1024
 213 mb = 1024 * kb
 214 gb = 1024 * mb
 215 tb = 1024 * gb
 216 pb = 1024 * tb
 217 
 218 months = [
 219     'January', 'February', 'March', 'April', 'May', 'June',
 220     'July', 'August', 'September', 'October', 'November', 'December',
 221 ]
 222 
 223 monweek = [
 224     'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
 225     'Saturday', 'Sunday',
 226 ]
 227 
 228 sunweek = [
 229     'Sunday',
 230     'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
 231 ]
 232 
 233 # some convenience aliases to various funcs from the python stdlib
 234 geomean = geometric_mean
 235 harmean = harmonic_mean
 236 sd = stdev
 237 popsd = pstdev
 238 var = variance
 239 popvar = pvariance
 240 randbeta = betavariate
 241 randexp = expovariate
 242 randgamma = gammavariate
 243 randlognorm = lognormvariate
 244 randnorm = normalvariate
 245 randweibull = weibullvariate
 246 
 247 
 248 def dedup(v: Iterable) -> List:
 249     '''Ignore reappearing items from iterables: result is always a list'''
 250 
 251     got = set()
 252     dedup = []
 253     for e in v:
 254         if not e in got:
 255             got.add(e)
 256             dedup.append(e)
 257     return dedup
 258 
 259 unique = dedup
 260 
 261 def fix(x: Any, repl: Any = None) -> Any:
 262     '''Make values JSON-compatible'''
 263 
 264     if x == None:
 265         return x
 266     elif isinstance(x, bool) or isinstance(x, int) or isinstance(x, str):
 267         return x
 268     elif isinstance(x, float):
 269         # turn NaNs and Infinities into the replacement value given
 270         return x if not (isnan(x) or isinf(x)) else repl
 271     elif isinstance(x, list):
 272         return [fix(e) for e in x]
 273     elif isinstance(x, dict):
 274         return { k: fix(e) for k, e in x.items() }
 275     elif isinstance(x, Iterable):
 276         return [fix(e) for e in x]
 277     else:
 278         return str(x)
 279 
 280 def after(s: str, *what: str) -> str:
 281     for t in what:
 282         i = s.find(t)
 283         s = '' if i < 0 else s[i+len(t):]
 284     return s
 285 
 286 def after_last(s: str, *what: str) -> str:
 287     for t in what:
 288         i = s.rfind(t)
 289         s = '' if i < 0 else s[i+len(t):]
 290     return s
 291 
 292 afterlast = after_last
 293 
 294 def before(s: str, *what: str) -> str:
 295     for t in what:
 296         i = s.find(t)
 297         s = s if i < 0 else s[:i]
 298     return s
 299 
 300 def before_last(s: str, *what: str) -> str:
 301     for t in what:
 302         i = s.rfind(t)
 303         s = s if i < 0 else s[:i]
 304     return s
 305 
 306 beforelast = before_last
 307 
 308 def since(s: str, *what: str) -> str:
 309     for t in what:
 310         i = s.find(t)
 311         s = '' if i < 0 else s[i:]
 312     return s
 313 
 314 def since_last(s: str, *what: str) -> str:
 315     for t in what:
 316         i = s.rfind(t)
 317         s = '' if i < 0 else s[i:]
 318     return s
 319 
 320 sincelast = since_last
 321 
 322 def until(s: str, *what: str) -> str:
 323     for t in what:
 324         i = s.find(t)
 325         s = s if i < 0 else s[:i+len(t)]
 326     return s
 327 
 328 def now() -> datetime:
 329     return datetime.now()
 330 
 331 def now_dict() -> dict:
 332     v = datetime.now()
 333     return {
 334         'year': v.year,
 335         'month': v.month,
 336         'day': v.day,
 337         'hour': v.hour,
 338         'minute': v.minute,
 339         'second': v.second,
 340         'text': v.strftime('%Y-%m-%d %H:%M:%S %b %a'),
 341         'weekday': v.strftime('%A'),
 342     }
 343 
 344 nowdict = now_dict
 345 
 346 def gsub(s: str, what: str, repl: str) -> str:
 347     '''Replace all regex-matches with the string given'''
 348     return compile(what).sub(repl, s)
 349 
 350 def base64(x):
 351     return base64bytes(str(x).encode()).decode()
 352 
 353 def debase64(x):
 354     return debase64bytes(str(x).encode()).decode()
 355 
 356 def compile(s: str) -> Pattern:
 357     '''Cached regex `compiler`, so it's quicker to (re)use in formulas'''
 358 
 359     if s in re_cache:
 360         return re_cache[s]
 361     e = compile_uncached(s)
 362     re_cache[s] = e
 363     return e
 364 
 365 def squeeze(s: str) -> str:
 366     '''
 367     A more aggressive way to rid strings of extra spaces which,
 368     unlike string method strip, also squeezes inner runs of
 369     multiple spaces into single spaces
 370     '''
 371 
 372     s = s.strip()
 373     s = spaces_re.sub(' ', s)
 374     s = paddable_tab_re.sub('\t', s)
 375     return s
 376 
 377 def float_or(s: str, default: Any = None) -> Any:
 378     try:
 379         return float(s)
 380     except:
 381         return default
 382 
 383 floator = float_or
 384 
 385 
 386 def seems_url(s: str) -> bool:
 387     for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'):
 388         if s.startswith(prot):
 389             return True
 390     return False
 391 
 392 
 393 # handle cmd-line arguments
 394 args = argv[1:]
 395 load_input = True
 396 compact_output = False
 397 name = ''
 398 expression = None
 399 
 400 for e in args:
 401     l = e.lower()
 402     if l in ('-c', '--c', '-n', '--n', '-nil', '--nil', '-none', '--none',
 403         '-null', '--null'):
 404         load_input = False
 405     elif l in ('-c', '--c', '-compact', '--compact', '-j0', '--j0',
 406         '-json0', '--json0', '-json-0', '--json-0'):
 407         compact_output = True
 408     elif expression == None:
 409         expression = e
 410     elif name == '':
 411         name = e
 412     else:
 413         print('\x1b[31mmultiple inputs not allowed\x1b[0m', file=stderr)
 414         exit(1)
 415 
 416 try:
 417     if expression == None:
 418         print(info, file=stderr)
 419         exit(0)
 420 
 421     # when not in `no-input mode`, load JSON into variable `v`
 422     v = None
 423     if load_input:
 424         if name == '' or name == '-':
 425             stdin.reconfigure(encoding='utf-8')
 426             v = load(stdin)
 427         elif seems_url(name):
 428             with urlopen(name) as inp:
 429                 v = load(inp)
 430         else:
 431             with open(name, encoding='utf-8') as inp:
 432                 v = load(inp)
 433 
 434     # offer several aliases for main variable `v`; the intuitive
 435     # `in` (short for `input`) is a keyword, so it's not available
 436     d = v
 437     data = v
 438     value = v
 439     # input = v
 440 
 441     # auto-parse main value to floating-point, for convenience
 442     f = 0.0
 443     try:
 444         f = float(v)
 445     except:
 446         f = nan
 447 
 448     # prevent formulas from opening files, and similar other actions
 449     exec = None
 450     load = None
 451     open = None
 452     stdin = None
 453     urlopen = None
 454 
 455     # transform data using the formula/expression given: handle a dot as
 456     # an identity operation, evaluate anything else
 457     v = v if expression == '.' else eval(expression)
 458     v = fix(v)
 459 
 460     # import/define only after calling eval, further minimizing names
 461     # available to formulas being run
 462     sep = (',', ':') if compact_output else (', ', ': ')
 463     ind = None if compact_output else 2
 464 
 465     # emit result as JSON
 466     stdout.reconfigure(newline='\n', encoding='utf-8')
 467     dump(v, stdout, indent=ind, separators=sep, allow_nan=False)
 468     stdout.write('\n')
 469 except BrokenPipeError:
 470     # quit quietly, instead of showing a confusing error message
 471     stderr.flush()
 472     stderr.close()
 473 except KeyboardInterrupt:
 474     # quit quietly, instead of showing a confusing error message
 475     stderr.flush()
 476     stderr.close()
 477     exit(2)
 478 except Exception as e:
 479     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 480     exit(1)