File: tjp.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tjp [options...] [python expression] [file/URI...]
  28 
  29 
  30 Transform Json with Python runs a python expression on a single JSON-encoded
  31 input.
  32 
  33 The expression can use either `v`, `value`, `d`, or `data` for the decoded
  34 input. Invalid-JSON inputs result in an error, with no chance of recovery.
  35 
  36 Input-sources can be either files or web-URIs. When not given a named input,
  37 the standard input is used.
  38 
  39 
  40 Examples
  41 
  42 # numbers from 0 to 5; no input is read/used
  43 tjp = 'range(6)'
  44 
  45 # using bases 1 to 5, find all their powers up to the 4th
  46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))'
  47 
  48 # keep only the last 2 items from the input
  49 tjp = 'range(1, 6)' | tjp 'data[-2:]'
  50 
  51 # chunk/regroup input items into arrays of up to 3 items each
  52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)'
  53 
  54 # ignore errors/exceptions, in favor of a fallback value
  55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")'
  56 
  57 # ignore errors/exceptions, calling a fallback func with the exception
  58 tjp = 'rescue(lambda: 2 * float("no way"), str)'
  59 
  60 # use dot-syntax on JSON data
  61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz'
  62 
  63 # use dot-syntax on JSON data; keywords as properties are syntax-errors
  64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]'
  65 
  66 # func results are automatically called on the input
  67 tjp = '{"abc": 123, "def": 456}' | tjp len
  68 '''
  69 
  70 
  71 from itertools import islice
  72 from json import dump, load, loads
  73 compile_py = compile
  74 from math import isnan
  75 from re import compile as compile_uncached, IGNORECASE
  76 from sys import argv, exit, stderr, stdin, stdout
  77 from typing import Iterable
  78 
  79 
  80 if len(argv) < 2:
  81     print(info.strip(), file=stderr)
  82     exit(0)
  83 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  84     print(info.strip())
  85     exit(0)
  86 
  87 
  88 class Skip:
  89     pass
  90 
  91 
  92 skip = Skip()
  93 
  94 
  95 class Dottable:
  96     'Enable convenient dot-syntax access to dictionary values.'
  97 
  98     def __getattr__(self, key):
  99         return self.__dict__[key] if key in self.__dict__ else None
 100 
 101     def __getitem__(self, key):
 102         return self.__dict__[key] if key in self.__dict__ else None
 103 
 104     def __iter__(self):
 105         return iter(self.__dict__)
 106 
 107 def dotate(x):
 108     'Recursively ensure all dictionaries in a value are dot-accessible.'
 109 
 110     if isinstance(x, dict):
 111         d = Dottable()
 112         d.__dict__ = {k: dotate(v) for k, v in x.items()}
 113         return d
 114     if isinstance(x, list):
 115         return [dotate(e) for e in x]
 116     if isinstance(x, tuple):
 117         return tuple(dotate(e) for e in x)
 118     return x
 119 
 120 dotated = dotate
 121 dote = dotate
 122 doted = dotate
 123 dotified = dotate
 124 dotify = dotate
 125 dottified = dotate
 126 dottify = dotate
 127 
 128 
 129 def chunk(items, chunk_size):
 130     'Break iterable into chunks, each with up to the item-count given.'
 131 
 132     if isinstance(items, str):
 133         n = len(items)
 134         while n >= chunk_size:
 135             yield items[:chunk_size]
 136             items = items[chunk_size:]
 137             n -= chunk_size
 138         if n > 0:
 139             yield items
 140         return
 141 
 142     if not isinstance(chunk_size, int):
 143         raise Exception('non-integer chunk-size')
 144     if chunk_size < 1:
 145         raise Exception('non-positive chunk-size')
 146 
 147     it = iter(items)
 148     while True:
 149         head = tuple(islice(it, chunk_size))
 150         if not head:
 151             return
 152         yield head
 153 
 154 chunked = chunk
 155 
 156 
 157 # re_cache is used by custom func compile to cache previously-compiled
 158 # regular-expressions, which makes them quicker to (re)use in formulas
 159 re_cache = {}
 160 
 161 # ire_cache is like re_cache, except it's for case-insensitive regexes
 162 ire_cache = {}
 163 
 164 
 165 def compile(expr, flags = 0):
 166     'Speed-up using regexes across lines, by avoiding recompilations.'
 167 
 168     if flags != 0 and flags != IGNORECASE:
 169         msg = 'only the default and case-insensitive options are supported'
 170         raise Exception(msg)
 171 
 172     cache = re_cache if flags == 0 else ire_cache
 173     if expr in cache:
 174         return cache[expr]
 175 
 176     pat = compile_uncached(expr, flags)
 177     cache[expr] = pat
 178     return pat
 179 
 180 
 181 def icompile(expr):
 182     return compile(expr, IGNORECASE)
 183 
 184 
 185 def cond(*args):
 186     if len(args) == 0:
 187         return None
 188 
 189     for i, e in enumerate(args):
 190         if i % 2 == 0 and i < len(args) - 1 and e:
 191             return args[i + 1]
 192 
 193     return args[-1] if len(args) % 2 == 1 else None
 194 
 195 
 196 def dive(into, using):
 197     'Depth-first recursive caller for 1-input functions.'
 198 
 199     if callable(into):
 200         into, using = using, into
 201 
 202     def rec(v):
 203         if isinstance(v, dict):
 204             return {k: rec(v) for k, v in v.items()}
 205         if isinstance(v, Iterable) and not isinstance(v, str):
 206             return [rec(v) for v in v]
 207         return using(v)
 208 
 209     return rec(into)
 210 
 211 
 212 def divekeys(into, using):
 213     'Depth-first recursive caller for 2-input funcs which rename dict keys.'
 214 
 215     if callable(into):
 216         into, using = using, into
 217 
 218     def rec(v):
 219         if isinstance(v, dict):
 220             return {using(k): rec(v) for k, v in v.items()}
 221         if isinstance(v, Iterable) and not isinstance(v, str):
 222             return [rec(v) for i, v in enumerate(v)]
 223         return v
 224 
 225     return rec(None, into)
 226 
 227 
 228 def divekv(into, using, using2 = None):
 229     'Depth-first recursive caller for 2-input functions.'
 230 
 231     if using2 is None:
 232         if callable(into):
 233             into, using = using, into
 234     else:
 235         if not callable(using2):
 236             into, using, using2 = using2, into, using
 237 
 238     def rec(k, v):
 239         if isinstance(v, dict):
 240             return {k: rec(k, v) for k, v in v.items()}
 241         if isinstance(v, Iterable) and not isinstance(v, str):
 242             return [rec(i, v) for i, v in enumerate(v)]
 243         return using(k, v)
 244 
 245     def rec2(k, v):
 246         if isinstance(v, dict):
 247             return {str(using(k, v)): rec2(k, v) for k, v in v.items()}
 248         if isinstance(v, Iterable) and not isinstance(v, str):
 249             # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)}
 250             return [rec2(i, v) for i, v in enumerate(v)]
 251         return using2(k, v)
 252 
 253     return rec(None, into) if using2 is None else rec2(None, into)
 254 
 255 kvdive = divekv
 256 
 257 
 258 def drop(src, *what):
 259     if isinstance(src, str):
 260         for s in what:
 261             src = src.replace(s, '')
 262         return src
 263 
 264     def kdrop(src, what):
 265         return {k: v for (k, v) in src.items() if not (k in what)}
 266 
 267     if isinstance(src, dict):
 268         return kdrop(src, set(what))
 269 
 270     if isinstance(src, Iterable):
 271         what = set(what)
 272         return [kdrop(e, what) for e in src if isinstance(e, dict)]
 273 
 274     return None
 275 
 276 dropped = drop
 277 
 278 
 279 def join(x, y = ' '):
 280     'Join values into a string, or make a dict from keys and values.'
 281 
 282     if isinstance(x, str):
 283         return x.join(str(v) for v in y)
 284     if isinstance(y, str):
 285         return y.join(str(v) for v in x)
 286     return {k: v for k, v in zip(x, y)}
 287 
 288 
 289 def pick(src, *keys):
 290     if isinstance(src, dict):
 291         return {k: src.get(k, None) for k in keys}
 292     return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)]
 293 
 294 picked = pick
 295 
 296 
 297 def rescue(attempt, fallback = None):
 298     try:
 299         return attempt()
 300     except Exception as e:
 301         if callable(fallback):
 302             return fallback(e)
 303         return fallback
 304 
 305 catch = rescue
 306 recover = rescue
 307 rescued = rescue
 308 
 309 
 310 def retype(x):
 311     'Try to narrow the type of the value given.'
 312 
 313     if isinstance(x, float):
 314         n = int(x)
 315         return n if float(n) == x else x
 316 
 317     if not isinstance(x, str):
 318         return x
 319 
 320     try:
 321         return loads(x)
 322     except Exception:
 323         pass
 324 
 325     try:
 326         return int(x)
 327     except Exception:
 328         pass
 329 
 330     try:
 331         return float(x)
 332     except Exception:
 333         pass
 334 
 335     return x
 336 
 337 autocast = retype
 338 autocasted = retype
 339 mold = retype
 340 molded = retype
 341 recast = retype
 342 recasted = retype
 343 remold = retype
 344 remolded = retype
 345 retyped = retype
 346 
 347 
 348 def typeof(x):
 349     # return str(type(x))
 350     return {
 351         type(None): 'null',
 352         bool: 'boolean',
 353         dict: 'object',
 354         float: 'number',
 355         int: 'number',
 356         str: 'string',
 357         list: 'array',
 358         tuple: 'array',
 359     }.get(type(x), 'other')
 360 
 361 jstype = typeof
 362 
 363 
 364 def result_needs_fixing(x):
 365     if isinstance(x, float):
 366         return not isnan(x)
 367     if x is None or isinstance(x, (bool, int, float, str)):
 368         return False
 369     rec = result_needs_fixing
 370     if isinstance(x, dict):
 371         return any(rec(k) or rec(v) for k, v in x.items())
 372     if isinstance(x, (list, tuple)):
 373         return any(rec(e) for e in x)
 374     return True
 375 
 376 
 377 def fix_result(x, default):
 378     if x is type:
 379         return type(default).__name__
 380 
 381     # if expression results in a func, auto-call it with the original data
 382     if callable(x):
 383         x = x(default)
 384 
 385     if isinstance(x, float) and isnan(x):
 386         return None
 387 
 388     if x is None or isinstance(x, (bool, int, float, str)):
 389         return x
 390 
 391     rec = fix_result
 392 
 393     if isinstance(x, dict):
 394         return {
 395             rec(k, default): rec(v, default) for k, v in x.items() if not
 396                 (isinstance(k, Skip) or isinstance(v, Skip))
 397         }
 398 
 399     if isinstance(x, Iterable):
 400         return tuple(rec(e, default) for e in x if not isinstance(e, Skip))
 401 
 402     if isinstance(x, Dottable):
 403         return rec(x.__dict__, default)
 404 
 405     if isinstance(x, Exception):
 406         raise x
 407 
 408     return None if isinstance(x, Skip) else str(x)
 409 
 410 
 411 def fail(msg, code = 1):
 412     print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr)
 413     exit(code)
 414 
 415 
 416 def message(msg, result = None):
 417     print(msg, file=stderr)
 418     return result
 419 
 420 msg = message
 421 
 422 
 423 def seemsurl(path):
 424     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 425     return any(path.startswith(p) for p in protocols)
 426 
 427 
 428 def matchkey(kv, key):
 429     if key in kv:
 430         return key
 431 
 432     low = key.lower()
 433     for k in kv.keys():
 434         if low == k.lower():
 435             return k
 436 
 437     try:
 438         i = int(key)
 439         l = len(kv)
 440         if i < 0:
 441             i += l
 442 
 443         if not (-l <= i < l):
 444             return key
 445 
 446         for j, k in enumerate(kv.keys()):
 447             if i == j:
 448                 return k
 449     except Exception:
 450         return key
 451 
 452     return key
 453 
 454 
 455 def zoom(data, keys):
 456     for i, k in enumerate(keys):
 457         if isinstance(data, dict):
 458             # m = matchkey(data, k)
 459             # if not (m in data):
 460             #     raise Exception(f'{m}: object doesn\'t have that key')
 461             data = data.get(matchkey(data, k), None)
 462             continue
 463 
 464         if isinstance(data, (list, tuple)):
 465             if k == '+':
 466                 pick = keys[i + 1:]
 467                 return [{k: e.get(k, None) for k in pick}
 468                         for e in data if isinstance(e, dict)]
 469             if k == '-':
 470                 avoid = set(keys[i + 1:])
 471                 return [{k: v for (k, v) in e.items() if not (k in avoid)}
 472                         for e in data if isinstance(e, dict)]
 473             if k == '.':
 474                 rest = keys[i + 1:]
 475                 return [zoom(e, rest) for e in data]
 476 
 477             try:
 478                 k = int(k)
 479                 l = len(data)
 480                 data = data[k] if -l <= k < l else None
 481             except Exception:
 482                 # raise Exception(f'{k}: arrays don\'t have keys like objects')
 483                 data = None
 484             continue
 485 
 486         # return None
 487         # data = None
 488         raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}')
 489 
 490     return data
 491 
 492 
 493 def make_eval_once(run):
 494     def eval_once(expr):
 495         global eval
 496         eval = None
 497         return run(expr)
 498     return eval_once
 499 
 500 eval = make_eval_once(eval)
 501 
 502 
 503 cr = '\r'
 504 crlf = '\r\n'
 505 dquo = '"'
 506 dquote = '"'
 507 empty = ''
 508 lcurly = '{'
 509 lf = '\n'
 510 rcurly = '}'
 511 s = ''
 512 squo = '\''
 513 squote = '\''
 514 # utf8bom = '\xef\xbb\xbf'
 515 
 516 nil = None
 517 none = None
 518 null = None
 519 
 520 
 521 no_input_opts = (
 522     '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null',
 523 )
 524 compact_output_opts = (
 525     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 526 )
 527 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots')
 528 modules_opts = (
 529     '-m', '--m', '-mod', '--mod', '-module', '--module',
 530     '-modules', '--modules',
 531 )
 532 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback')
 533 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom')
 534 
 535 args = argv[1:]
 536 no_input = False
 537 zoom_stdin = False
 538 trace_errors = False
 539 dottable_input = False
 540 compact_output = False
 541 
 542 while len(args) > 0:
 543     if args[0] in no_input_opts:
 544         no_input = True
 545         args = args[1:]
 546         continue
 547 
 548     if args[0] in compact_output_opts:
 549         compact_output = True
 550         args = args[1:]
 551         continue
 552 
 553     if args[0] in dot_opts:
 554         dottable_input = True
 555         args = args[1:]
 556         continue
 557 
 558     if args[0] in modules_opts:
 559         try:
 560             if len(args) < 2:
 561                 msg = 'a module name or a comma-separated list of modules'
 562                 raise Exception('expected ' + msg)
 563 
 564             g = globals()
 565             from importlib import import_module
 566             for e in args[1].split(','):
 567                 g[e] = import_module(e)
 568 
 569             g = None
 570             import_module = None
 571             args = args[2:]
 572         except Exception as e:
 573             fail(e, 1)
 574 
 575         continue
 576 
 577     if args[0] in trace_opts:
 578         trace_errors = True
 579         args = args[1:]
 580         continue
 581 
 582     if args[0] in zoom_opts:
 583         zoom_stdin = True
 584         args = args[1:]
 585         break
 586 
 587     break
 588 
 589 
 590 try:
 591     if zoom_stdin:
 592         data = load(stdin)
 593         data = zoom(data, args)
 594     else:
 595         expr = 'data'
 596         if len(args) > 0:
 597             expr = args[0]
 598             args = args[1:]
 599 
 600         if expr == '.':
 601             expr = 'data'
 602         expr = compile_py(expr, expr, mode='eval')
 603 
 604         if len(args) > 1:
 605             raise Exception('can\'t use more than 1 input')
 606         path = '-' if len(args) == 0 else args[0]
 607 
 608         if no_input:
 609             data = None
 610         elif path == '-':
 611             data = load(stdin)
 612         elif seemsurl(path):
 613             from io import TextIOWrapper
 614             from urllib.request import urlopen
 615             with urlopen(path) as inp:
 616                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 617                     data = load(txt)
 618         else:
 619             with open(path, encoding='utf-8') as inp:
 620                 data = load(inp)
 621 
 622         if dottable_input:
 623             data = dotate(data)
 624 
 625         v = val = value = d = dat = data
 626         compile_py = None
 627         exec = None
 628         open = None
 629         data = eval(expr)
 630 
 631     if result_needs_fixing(data):
 632         data = fix_result(data, data)
 633 
 634     v = data
 635     if compact_output:
 636         dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False)
 637     else:
 638         dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False)
 639     print()
 640 except BrokenPipeError:
 641     # quit quietly, instead of showing a confusing error message
 642     stderr.close()
 643     exit(0)
 644 except KeyboardInterrupt:
 645     exit(2)
 646 except Exception as e:
 647     if trace_errors:
 648         raise e
 649     else:
 650         fail(e, 1)