File: tj.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # tj [options...] [python expression] [filepath/URI...] 27 # 28 # Transform Json loads JSON data, runs a Python expression on it, and emits 29 # the result as JSON. Input data are available to the expression as variables 30 # named `v`, `d`, and `data`. 31 # 32 # If no file is given, it loads JSON data from its standard input. 33 # 34 # Options, where leading double-dashes are also allowed: 35 # 36 # -c compact JSON output (JSON-0), without unneeded spaces 37 # -compact same as -c 38 # -j0 same as -c 39 # -json0 same as -c 40 # -json-0 same as -c 41 # 42 # -h show this help message 43 # -help same as -h 44 # 45 # -n No input / load Nothing 46 # -nil same as -n 47 # -none same as -n 48 # -null same as -n 49 50 51 from base64 import \ 52 standard_b64encode as base64bytes, standard_b64decode as debase64bytes 53 54 from datetime import \ 55 MAXYEAR, MINYEAR, date, datetime, time, timedelta, timezone, tzinfo 56 try: 57 from datetime import UTC 58 except: 59 pass 60 61 from functools import \ 62 cache, cached_property, cmp_to_key, get_cache_token, lru_cache, \ 63 namedtuple, partial, partialmethod, recursive_repr, reduce, \ 64 singledispatch, singledispatchmethod, total_ordering, update_wrapper, \ 65 wraps 66 67 from itertools import \ 68 accumulate, chain, combinations, combinations_with_replacement, \ 69 compress, count, cycle, dropwhile, filterfalse, groupby, islice, \ 70 permutations, product, repeat, starmap, takewhile, tee, zip_longest 71 try: 72 from itertools import pairwise 73 except: 74 pass 75 76 from json import dump, dumps, load, loads 77 78 import math 79 from math import \ 80 acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \ 81 copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \ 82 fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \ 83 isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \ 84 log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \ 85 radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp 86 try: 87 from math import cbrt, exp2 88 except: 89 pass 90 91 from random import \ 92 betavariate, choice, choices, expovariate, gammavariate, gauss, \ 93 getrandbits, getstate, lognormvariate, normalvariate, paretovariate, \ 94 randbytes, randint, random, randrange, sample, seed, setstate, \ 95 shuffle, triangular, uniform, vonmisesvariate, weibullvariate 96 97 from re import compile as compile_uncached, Pattern 98 99 from statistics import \ 100 bisect_left, bisect_right, fmean, \ 101 geometric_mean, harmonic_mean, mean, median, \ 102 median_grouped, median_high, median_low, mode, multimode, pstdev, \ 103 pvariance, quantiles, stdev, variance 104 try: 105 from statistics import \ 106 correlation, covariance, linear_regression, mul, reduce 107 except: 108 pass 109 110 from string import \ 111 Formatter, Template, ascii_letters, ascii_lowercase, ascii_uppercase, \ 112 capwords, digits, hexdigits, octdigits, printable, punctuation, \ 113 whitespace 114 115 from sys import argv, exit, stderr, stdin, stdout 116 117 from textwrap import dedent, fill, indent, shorten, wrap 118 119 from time import \ 120 altzone, asctime, \ 121 ctime, daylight, get_clock_info, \ 122 gmtime, localtime, mktime, monotonic, monotonic_ns, perf_counter, \ 123 perf_counter_ns, process_time, process_time_ns, \ 124 sleep, strftime, strptime, struct_time, thread_time, thread_time_ns, \ 125 time, time_ns, timezone, tzname 126 try: 127 from time import \ 128 clock_getres, clock_gettime, clock_gettime_ns, clock_settime, \ 129 clock_settime_ns, pthread_getcpuclockid, tzset 130 except: 131 pass 132 133 # some defined funcs exposed to formulas use type declarations 134 from typing import Any, Iterable, List 135 136 from unicodedata import \ 137 bidirectional, category, combining, decimal, decomposition, digit, \ 138 east_asian_width, is_normalized, lookup, mirrored, name, normalize, \ 139 numeric 140 141 from urllib.request import urlopen 142 143 144 # info is the message shown when the script isn't given any argument, or 145 # when the leading argument is one of the standard cmd-line help options 146 info = ''' 147 tj [options...] [python expression] [filepath/URI...] 148 149 Transform Json loads JSON data, runs a Python expression on it, and emits 150 the result as JSON. Input data are available to the expression as variables 151 named `v`, `d`, and `data`. 152 153 If no file is given, it loads JSON data from its standard input. 154 155 Options, where leading double-dashes are also allowed: 156 157 -c compact JSON output (JSON-0), without unneeded spaces 158 -compact same as -c 159 -j0 same as -c 160 -json0 same as -c 161 -json-0 same as -c 162 163 -h show this help message 164 -help same as -h 165 166 -n No input / load Nothing 167 -nil same as -n 168 -none same as -n 169 -null same as -n 170 '''.strip() 171 172 173 # no args or a leading help-option arg means show the help message and quit 174 if len(argv) < 2 or argv[1].lower() in ('-h', '--h', '-help', '--help'): 175 print(info, file=stderr) 176 exit(0) 177 178 179 # re_cache is used by custom func compile to cache previously-compiled 180 # regular-expressions, which makes them quicker to (re)use in formulas 181 re_cache = {} 182 183 # paddable_tab_re detects single tabs and possible runs of spaces around 184 # them, and is used in func squeeze 185 paddable_tab_re = compile_uncached(' *\t *') 186 187 # spaces_re detects runs of 2 or more spaces, and is used in func squeeze 188 spaces_re = compile_uncached(' +') 189 190 191 # some convenience aliases to commonly-used values 192 true = True 193 false = False 194 nil = None 195 none = None 196 null = None 197 block = '█' 198 cdot = '·' 199 colon = ':' 200 comma = ',' 201 crlf = '\r\n' 202 dot = '.' 203 empty = '' 204 lf = '\n' 205 mdot = '·' 206 semicolon = ';' 207 space = ' ' 208 tab = '\t' 209 utf8bom = '\xef\xbb\xbf' 210 211 # some occasionally-useful values 212 kb = 1024 213 mb = 1024 * kb 214 gb = 1024 * mb 215 tb = 1024 * gb 216 pb = 1024 * tb 217 218 months = [ 219 'January', 'February', 'March', 'April', 'May', 'June', 220 'July', 'August', 'September', 'October', 'November', 'December', 221 ] 222 223 monweek = [ 224 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 225 'Saturday', 'Sunday', 226 ] 227 228 sunweek = [ 229 'Sunday', 230 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 231 ] 232 233 # some convenience aliases to various funcs from the python stdlib 234 geomean = geometric_mean 235 harmean = harmonic_mean 236 sd = stdev 237 popsd = pstdev 238 var = variance 239 popvar = pvariance 240 randbeta = betavariate 241 randexp = expovariate 242 randgamma = gammavariate 243 randlognorm = lognormvariate 244 randnorm = normalvariate 245 randweibull = weibullvariate 246 247 248 def dedup(v: Iterable) -> List: 249 '''Ignore reappearing items from iterables: result is always a list''' 250 251 got = set() 252 dedup = [] 253 for e in v: 254 if not e in got: 255 got.add(e) 256 dedup.append(e) 257 return dedup 258 259 unique = dedup 260 261 def fix(x: Any, repl: Any = None) -> Any: 262 '''Make values JSON-compatible''' 263 264 if x == None: 265 return x 266 elif isinstance(x, bool) or isinstance(x, int) or isinstance(x, str): 267 return x 268 elif isinstance(x, float): 269 # turn NaNs and Infinities into the replacement value given 270 return x if not (isnan(x) or isinf(x)) else repl 271 elif isinstance(x, list): 272 return [fix(e) for e in x] 273 elif isinstance(x, dict): 274 return { k: fix(e) for k, e in x.items() } 275 elif isinstance(x, Iterable): 276 return [fix(e) for e in x] 277 else: 278 return str(x) 279 280 def after(s: str, *what: str) -> str: 281 for t in what: 282 i = s.find(t) 283 s = '' if i < 0 else s[i+len(t):] 284 return s 285 286 def after_last(s: str, *what: str) -> str: 287 for t in what: 288 i = s.rfind(t) 289 s = '' if i < 0 else s[i+len(t):] 290 return s 291 292 afterlast = after_last 293 294 def before(s: str, *what: str) -> str: 295 for t in what: 296 i = s.find(t) 297 s = s if i < 0 else s[:i] 298 return s 299 300 def before_last(s: str, *what: str) -> str: 301 for t in what: 302 i = s.rfind(t) 303 s = s if i < 0 else s[:i] 304 return s 305 306 beforelast = before_last 307 308 def since(s: str, *what: str) -> str: 309 for t in what: 310 i = s.find(t) 311 s = '' if i < 0 else s[i:] 312 return s 313 314 def since_last(s: str, *what: str) -> str: 315 for t in what: 316 i = s.rfind(t) 317 s = '' if i < 0 else s[i:] 318 return s 319 320 sincelast = since_last 321 322 def until(s: str, *what: str) -> str: 323 for t in what: 324 i = s.find(t) 325 s = s if i < 0 else s[:i+len(t)] 326 return s 327 328 def now() -> datetime: 329 return datetime.now() 330 331 def now_dict() -> dict: 332 v = datetime.now() 333 return { 334 'year': v.year, 335 'month': v.month, 336 'day': v.day, 337 'hour': v.hour, 338 'minute': v.minute, 339 'second': v.second, 340 'text': v.strftime('%Y-%m-%d %H:%M:%S %b %a'), 341 'weekday': v.strftime('%A'), 342 } 343 344 nowdict = now_dict 345 346 def gsub(s: str, what: str, repl: str) -> str: 347 '''Replace all regex-matches with the string given''' 348 return compile(what).sub(repl, s) 349 350 def base64(x): 351 return base64bytes(str(x).encode()).decode() 352 353 def debase64(x): 354 return debase64bytes(str(x).encode()).decode() 355 356 def compile(s: str) -> Pattern: 357 '''Cached regex `compiler`, so it's quicker to (re)use in formulas''' 358 359 if s in re_cache: 360 return re_cache[s] 361 e = compile_uncached(s) 362 re_cache[s] = e 363 return e 364 365 def squeeze(s: str) -> str: 366 ''' 367 A more aggressive way to rid strings of extra spaces which, 368 unlike string method strip, also squeezes inner runs of 369 multiple spaces into single spaces 370 ''' 371 372 s = s.strip() 373 s = spaces_re.sub(' ', s) 374 s = paddable_tab_re.sub('\t', s) 375 return s 376 377 def float_or(s: str, default: Any = None) -> Any: 378 try: 379 return float(s) 380 except: 381 return default 382 383 floator = float_or 384 385 386 def seems_url(s: str) -> bool: 387 for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'): 388 if s.startswith(prot): 389 return True 390 return False 391 392 393 # handle cmd-line arguments 394 args = argv[1:] 395 load_input = True 396 compact_output = False 397 name = '' 398 expression = None 399 400 for e in args: 401 l = e.lower() 402 if l in ('-c', '--c', '-n', '--n', '-nil', '--nil', '-none', '--none', 403 '-null', '--null'): 404 load_input = False 405 elif l in ('-c', '--c', '-compact', '--compact', '-j0', '--j0', 406 '-json0', '--json0', '-json-0', '--json-0'): 407 compact_output = True 408 elif expression == None: 409 expression = e 410 elif name == '': 411 name = e 412 else: 413 print('\x1b[31mmultiple inputs not allowed\x1b[0m', file=stderr) 414 exit(1) 415 416 try: 417 if expression == None: 418 print(info, file=stderr) 419 exit(0) 420 421 # when not in `no-input mode`, load JSON into variable `v` 422 v = None 423 if load_input: 424 if name == '' or name == '-': 425 stdin.reconfigure(encoding='utf-8') 426 v = load(stdin) 427 elif seems_url(name): 428 with urlopen(name) as inp: 429 v = load(inp) 430 else: 431 with open(name, encoding='utf-8') as inp: 432 v = load(inp) 433 434 # offer several aliases for main variable `v`; the intuitive 435 # `in` (short for `input`) is a keyword, so it's not available 436 d = v 437 data = v 438 value = v 439 # input = v 440 441 # auto-parse main value to floating-point, for convenience 442 f = 0.0 443 try: 444 f = float(v) 445 except: 446 f = nan 447 448 # prevent formulas from opening files, and similar other actions 449 exec = None 450 load = None 451 open = None 452 stdin = None 453 urlopen = None 454 455 # transform data using the formula/expression given: handle a dot as 456 # an identity operation, evaluate anything else 457 v = v if expression == '.' else eval(expression) 458 v = fix(v) 459 460 # import/define only after calling eval, further minimizing names 461 # available to formulas being run 462 sep = (',', ':') if compact_output else (', ', ': ') 463 ind = None if compact_output else 2 464 465 # emit result as JSON 466 stdout.reconfigure(newline='\n', encoding='utf-8') 467 dump(v, stdout, indent=ind, separators=sep, allow_nan=False) 468 stdout.write('\n') 469 except BrokenPipeError: 470 # quit quietly, instead of showing a confusing error message 471 stderr.flush() 472 stderr.close() 473 except KeyboardInterrupt: 474 # quit quietly, instead of showing a confusing error message 475 stderr.flush() 476 stderr.close() 477 exit(2) 478 except Exception as e: 479 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 480 exit(1)