File: tjp.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tjp [options...] [python expression] [file/URI...]
  28 
  29 
  30 Transform Json with Python runs a python expression on a single JSON-encoded
  31 input.
  32 
  33 The expression can use either `v`, `value`, `d`, or `data` for the decoded
  34 input. Invalid-JSON inputs result in an error, with no chance of recovery.
  35 
  36 Input-sources can be either files or web-URIs. When not given a named input,
  37 the standard input is used.
  38 
  39 
  40 Examples
  41 
  42 # numbers from 0 to 5; no input is read/used
  43 tjp = 'range(6)'
  44 
  45 # using bases 1 to 5, find all their powers up to the 4th
  46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))'
  47 
  48 # keep only the last 2 items from the input
  49 tjp = 'range(1, 6)' | tjp 'data[-2:]'
  50 
  51 # chunk/regroup input items into arrays of up to 3 items each
  52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)'
  53 
  54 # ignore errors/exceptions, in favor of a fallback value
  55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")'
  56 
  57 # ignore errors/exceptions, calling a fallback func with the exception
  58 tjp = 'rescue(lambda: 2 * float("no way"), str)'
  59 
  60 # use dot-syntax on JSON data
  61 tjp = '{"abc": {"xyz": 123}}' | tjp -d 'data.abc.xyz'
  62 
  63 # use dot-syntax on JSON data; keywords as properties are syntax-errors
  64 tjp = '{"abc": {"def": 123}}' | tjp -d 'data.abc["def"]'
  65 
  66 # func results are automatically called on the input
  67 tjp = '{"abc": 123, "def": 456}' | tjp len
  68 '''
  69 
  70 
  71 from itertools import islice
  72 from json import dump, load, loads
  73 compile_py = compile
  74 from re import compile as compile_uncached, IGNORECASE
  75 from sys import argv, exit, stderr, stdin, stdout
  76 from typing import Iterable
  77 
  78 
  79 if len(argv) < 2:
  80     print(info.strip(), file=stderr)
  81     exit(0)
  82 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  83     print(info.strip())
  84     exit(0)
  85 
  86 
  87 class Skip:
  88     pass
  89 
  90 
  91 skip = Skip()
  92 
  93 
  94 class Dottable:
  95     'Enable convenient dot-syntax access to dictionary values.'
  96 
  97     def __getattr__(self, key):
  98         return self.__dict__[key] if key in self.__dict__ else None
  99 
 100     def __getitem__(self, key):
 101         return self.__dict__[key] if key in self.__dict__ else None
 102 
 103     def __iter__(self):
 104         return iter(self.__dict__)
 105 
 106 def dotate(x):
 107     'Recursively ensure all dictionaries in a value are dot-accessible.'
 108 
 109     if isinstance(x, dict):
 110         d = Dottable()
 111         d.__dict__ = {k: dotate(v) for k, v in x.items()}
 112         return d
 113     if isinstance(x, list):
 114         return [dotate(e) for e in x]
 115     if isinstance(x, tuple):
 116         return tuple(dotate(e) for e in x)
 117     return x
 118 
 119 dotated = dotate
 120 dote = dotate
 121 doted = dotate
 122 dotified = dotate
 123 dotify = dotate
 124 dottified = dotate
 125 dottify = dotate
 126 
 127 
 128 def chunk(items, chunk_size):
 129     'Break iterable into chunks, each with up to the item-count given.'
 130 
 131     if isinstance(items, str):
 132         n = len(items)
 133         while n >= chunk_size:
 134             yield items[:chunk_size]
 135             items = items[chunk_size:]
 136             n -= chunk_size
 137         if n > 0:
 138             yield items
 139         return
 140 
 141     if not isinstance(chunk_size, int):
 142         raise Exception('non-integer chunk-size')
 143     if chunk_size < 1:
 144         raise Exception('non-positive chunk-size')
 145 
 146     it = iter(items)
 147     while True:
 148         head = tuple(islice(it, chunk_size))
 149         if not head:
 150             return
 151         yield head
 152 
 153 chunked = chunk
 154 
 155 
 156 # re_cache is used by custom func compile to cache previously-compiled
 157 # regular-expressions, which makes them quicker to (re)use in formulas
 158 re_cache = {}
 159 
 160 # ire_cache is like re_cache, except it's for case-insensitive regexes
 161 ire_cache = {}
 162 
 163 
 164 def compile(expr, flags = 0):
 165     'Speed-up using regexes across lines, by avoiding recompilations.'
 166 
 167     if flags != 0 and flags != IGNORECASE:
 168         msg = 'only the default and case-insensitive options are supported'
 169         raise Exception(msg)
 170 
 171     cache = re_cache if flags == 0 else ire_cache
 172     if expr in cache:
 173         return cache[expr]
 174 
 175     pat = compile_uncached(expr, flags)
 176     cache[expr] = pat
 177     return pat
 178 
 179 
 180 def icompile(expr):
 181     return compile(expr, IGNORECASE)
 182 
 183 
 184 def cond(*args):
 185     if len(args) == 0:
 186         return None
 187 
 188     for i, e in enumerate(args):
 189         if i % 2 == 0 and i < len(args) - 1 and e:
 190             return args[i + 1]
 191 
 192     return args[-1] if len(args) % 2 == 1 else None
 193 
 194 
 195 def dive(into, using):
 196     'Depth-first recursive caller for 1-input functions.'
 197 
 198     if callable(into):
 199         into, using = using, into
 200 
 201     def rec(v):
 202         if isinstance(v, dict):
 203             return {k: rec(v) for k, v in v.items()}
 204         if isinstance(v, Iterable) and not isinstance(v, str):
 205             return [rec(v) for v in v]
 206         return using(v)
 207 
 208     return rec(into)
 209 
 210 
 211 def divekeys(into, using):
 212     'Depth-first recursive caller for 2-input funcs which rename dict keys.'
 213 
 214     if callable(into):
 215         into, using = using, into
 216 
 217     def rec(v):
 218         if isinstance(v, dict):
 219             return {using(k): rec(v) for k, v in v.items()}
 220         if isinstance(v, Iterable) and not isinstance(v, str):
 221             return [rec(v) for i, v in enumerate(v)]
 222         return v
 223 
 224     return rec(None, into)
 225 
 226 
 227 def divekv(into, using, using2 = None):
 228     'Depth-first recursive caller for 2-input functions.'
 229 
 230     if using2 is None:
 231         if callable(into):
 232             into, using = using, into
 233     else:
 234         if not callable(using2):
 235             into, using, using2 = using2, into, using
 236 
 237     def rec(k, v):
 238         if isinstance(v, dict):
 239             return {k: rec(k, v) for k, v in v.items()}
 240         if isinstance(v, Iterable) and not isinstance(v, str):
 241             return [rec(i, v) for i, v in enumerate(v)]
 242         return using(k, v)
 243 
 244     def rec2(k, v):
 245         if isinstance(v, dict):
 246             return {str(using(k, v)): rec2(k, v) for k, v in v.items()}
 247         if isinstance(v, Iterable) and not isinstance(v, str):
 248             # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)}
 249             return [rec2(i, v) for i, v in enumerate(v)]
 250         return using2(k, v)
 251 
 252     return rec(None, into) if using2 is None else rec2(None, into)
 253 
 254 kvdive = divekv
 255 
 256 
 257 def drop(src, *what):
 258     if isinstance(src, str):
 259         for s in what:
 260             src = src.replace(s, '')
 261         return src
 262 
 263     def kdrop(src, what):
 264         kv = {}
 265         for k, v in src.items():
 266             if not (k in what):
 267                 kv[k] = v
 268         return kv
 269 
 270     if isinstance(src, dict):
 271         return kdrop(src, set(what))
 272 
 273     if isinstance(src, Iterable):
 274         what = set(what)
 275         return [kdrop(e, what) for e in src]
 276 
 277     return None
 278 
 279 dropped = drop
 280 
 281 
 282 def join(x, y = ' '):
 283     'Join values into a string, or make a dict from keys and values.'
 284 
 285     if isinstance(x, str):
 286         return x.join(str(v) for v in y)
 287     if isinstance(y, str):
 288         return y.join(str(v) for v in x)
 289     return {k: v for k, v in zip(x, y)}
 290 
 291 
 292 def pick(src, *keys):
 293     if isinstance(src, dict):
 294         return {k: src.get(k, None) for k in keys}
 295     return [{k: e.get(k, None) for k in keys} for e in src]
 296 
 297 picked = pick
 298 
 299 
 300 def rescue(attempt, fallback = None):
 301     try:
 302         return attempt()
 303     except Exception as e:
 304         if callable(fallback):
 305             return fallback(e)
 306         return fallback
 307 
 308 catch = rescue
 309 catched = rescue
 310 caught = rescue
 311 recover = rescue
 312 recovered = rescue
 313 rescued = rescue
 314 
 315 
 316 def retype(x):
 317     'Try to narrow the type of the value given.'
 318 
 319     if isinstance(x, float):
 320         n = int(x)
 321         return n if float(n) == x else x
 322 
 323     if not isinstance(x, str):
 324         return x
 325 
 326     try:
 327         return loads(x)
 328     except Exception:
 329         pass
 330 
 331     try:
 332         return int(x)
 333     except Exception:
 334         pass
 335 
 336     try:
 337         return float(x)
 338     except Exception:
 339         pass
 340 
 341     return x
 342 
 343 autocast = retype
 344 autocasted = retype
 345 mold = retype
 346 molded = retype
 347 recast = retype
 348 recasted = retype
 349 remold = retype
 350 remolded = retype
 351 retyped = retype
 352 
 353 
 354 def typeof(x):
 355     # return str(type(x))
 356     return {
 357         type(None): 'null',
 358         bool: 'boolean',
 359         dict: 'object',
 360         float: 'number',
 361         int: 'number',
 362         str: 'string',
 363         list: 'array',
 364         tuple: 'array',
 365     }.get(type(x), 'other')
 366 
 367 jstype = typeof
 368 
 369 
 370 def result_needs_fixing(x):
 371     if x is None or isinstance(x, (bool, int, float, str)):
 372         return False
 373     rec = result_needs_fixing
 374     if isinstance(x, dict):
 375         return any(rec(k) or rec(v) for k, v in x.items())
 376     if isinstance(x, (list, tuple)):
 377         return any(rec(e) for e in x)
 378     return True
 379 
 380 
 381 def fix_result(x, default):
 382     if x is type:
 383         return type(default).__name__
 384 
 385     # if expression results in a func, auto-call it with the original data
 386     if callable(x):
 387         x = x(default)
 388 
 389     if x is None or isinstance(x, (bool, int, float, str)):
 390         return x
 391 
 392     rec = fix_result
 393 
 394     if isinstance(x, dict):
 395         return {
 396             rec(k, default): rec(v, default) for k, v in x.items() if not
 397                 (isinstance(k, Skip) or isinstance(v, Skip))
 398         }
 399 
 400     if isinstance(x, Iterable):
 401         return tuple(rec(e, default) for e in x if not isinstance(e, Skip))
 402 
 403     if isinstance(x, Dottable):
 404         return rec(x.__dict__, default)
 405 
 406     if isinstance(x, Exception):
 407         raise x
 408 
 409     return None if isinstance(x, Skip) else str(x)
 410 
 411 
 412 def fail(msg, code = 1):
 413     print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr)
 414     exit(code)
 415 
 416 
 417 def message(msg, result = None):
 418     print(msg, file=stderr)
 419     return result
 420 
 421 msg = message
 422 
 423 
 424 def seemsurl(path):
 425     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 426     return any(path.startswith(p) for p in protocols)
 427 
 428 
 429 def matchkey(kv, key):
 430     if key in kv:
 431         return key
 432 
 433     low = key.lower()
 434     for k in kv.keys():
 435         if low == k.lower():
 436             return k
 437 
 438     try:
 439         i = int(key)
 440         l = len(kv)
 441         if i < 0:
 442             i += l
 443 
 444         if not (-l <= i < l):
 445             return key
 446 
 447         for j, k in enumerate(kv.keys()):
 448             if i == j:
 449                 return k
 450     except Exception as _:
 451         return key
 452 
 453     return key
 454 
 455 
 456 def zoom(data, keys):
 457     for k in keys:
 458         if isinstance(data, dict):
 459             # m = matchkey(data, k)
 460             # if not (m in data):
 461             #     raise Exception(f'{m}: object doesn\'t have that key')
 462             data = data.get(matchkey(data, k), None)
 463             continue
 464 
 465         if isinstance(data, (list, tuple)):
 466             try:
 467                 k = int(k)
 468                 l = len(data)
 469                 data = data[k] if -l <= k < l else None
 470             except Exception as _:
 471                 # raise Exception(f'{k}: arrays don\'t have keys like objects')
 472                 data = None
 473             continue
 474 
 475         # return None
 476         # if not (data is None):
 477         #     data = None
 478         #     continue
 479         raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}')
 480 
 481     return data
 482 
 483 
 484 def make_eval_once(run):
 485     def eval_once(expr):
 486         global eval
 487         eval = None
 488         return run(expr)
 489     return eval_once
 490 
 491 eval = make_eval_once(eval)
 492 
 493 
 494 cr = '\r'
 495 crlf = '\r\n'
 496 dquo = '"'
 497 dquote = '"'
 498 empty = ''
 499 lcurly = '{'
 500 lf = '\n'
 501 rcurly = '}'
 502 s = ''
 503 squo = '\''
 504 squote = '\''
 505 # utf8bom = '\xef\xbb\xbf'
 506 
 507 nil = None
 508 none = None
 509 null = None
 510 
 511 
 512 no_input_opts = (
 513     '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null',
 514 )
 515 compact_output_opts = (
 516     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 517 )
 518 dot_opts = ('-d', '--d', '-dot', '--dot', '-dots', '--dots')
 519 modules_opts = (
 520     '-m', '--m', '-mod', '--mod', '-module', '--module',
 521     '-modules', '--modules',
 522 )
 523 more_modules_opts = ('-mm', '--mm', '-more', '--more')
 524 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback')
 525 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom')
 526 
 527 args = argv[1:]
 528 no_input = False
 529 zoom_stdin = False
 530 trace_errors = False
 531 dottable_input = False
 532 compact_output = False
 533 
 534 while len(args) > 0:
 535     if args[0] in no_input_opts:
 536         no_input = True
 537         args = args[1:]
 538         continue
 539 
 540     if args[0] in compact_output_opts:
 541         compact_output = True
 542         args = args[1:]
 543         continue
 544 
 545     if args[0] in dot_opts:
 546         dottable_input = True
 547         args = args[1:]
 548         continue
 549 
 550     if args[0] in modules_opts:
 551         try:
 552             if len(args) < 2:
 553                 msg = 'a module name or a comma-separated list of modules'
 554                 raise Exception('expected ' + msg)
 555 
 556             g = globals()
 557             from importlib import import_module
 558             for e in args[1].split(','):
 559                 g[e] = import_module(e)
 560 
 561             g = None
 562             import_module = None
 563             args = args[2:]
 564         except Exception as e:
 565             fail(e, 1)
 566 
 567         continue
 568 
 569     if args[0] in more_modules_opts:
 570         import functools, itertools, json, math, random, statistics, string, time
 571         args = args[1:]
 572         continue
 573 
 574     if args[0] in trace_opts:
 575         trace_errors = True
 576         args = args[1:]
 577         continue
 578 
 579     if args[0] in zoom_opts:
 580         zoom_stdin = True
 581         args = args[1:]
 582         break
 583 
 584     break
 585 
 586 
 587 try:
 588     expr = 'data'
 589     if len(args) > 0:
 590         expr = args[0]
 591         args = args[1:]
 592 
 593     if expr == '.':
 594         expr = 'data'
 595     expr = compile_py(expr, expr, mode='eval')
 596 
 597     if len(args) > 1:
 598         raise Exception('can\'t use more than 1 input')
 599     path = '-' if len(args) == 0 else args[0]
 600 
 601     if no_input:
 602         data = None
 603     elif zoom_stdin:
 604         data = load(stdin)
 605         data = zoom(data, args)
 606     elif path == '-':
 607         data = load(stdin)
 608     elif seemsurl(path):
 609         from io import TextIOWrapper
 610         from urllib.request import urlopen
 611         with urlopen(path) as inp:
 612             with TextIOWrapper(inp, encoding='utf-8') as txt:
 613                 data = load(txt)
 614     else:
 615         with open(path, encoding='utf-8') as inp:
 616             data = load(inp)
 617 
 618     if (not zoom_stdin) and dottable_input:
 619         data = dotate(data)
 620 
 621     v = value = d = data
 622 
 623     if not zoom_stdin:
 624         compile_py = None
 625         exec = None
 626         open = None
 627         v = eval(expr)
 628         if result_needs_fixing(v):
 629             v = fix_result(v, data)
 630 
 631     if compact_output:
 632         dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False)
 633     else:
 634         dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False)
 635     print()
 636 except BrokenPipeError:
 637     # quit quietly, instead of showing a confusing error message
 638     stderr.close()
 639     exit(0)
 640 except KeyboardInterrupt:
 641     # stderr.close()
 642     exit(2)
 643 except Exception as e:
 644     if trace_errors:
 645         raise e
 646     else:
 647         fail(e, 1)