File: tjp.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tjp [options...] [python expression] [file/URI...]
  28 
  29 
  30 Transform Json with Python runs a python expression on a single JSON-encoded
  31 input.
  32 
  33 The expression can use either `v`, `value`, `d`, or `data` for the decoded
  34 input. Invalid-JSON inputs result in an error, with no chance of recovery.
  35 
  36 Input-sources can be either files or web-URIs. When not given a named input,
  37 the standard input is used.
  38 
  39 
  40 Examples
  41 
  42 # numbers from 0 to 5; no input is read/used
  43 tjp = 'range(6)'
  44 
  45 # using bases 1 to 5, find all their powers up to the 4th
  46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))'
  47 
  48 # keep only the last 2 items from the input
  49 tjp = 'range(1, 6)' | tjp 'data[-2:]'
  50 
  51 # chunk/regroup input items into arrays of up to 3 items each
  52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)'
  53 
  54 # ignore errors/exceptions, in favor of a fallback value
  55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")'
  56 
  57 # ignore errors/exceptions, calling a fallback func with the exception
  58 tjp = 'rescue(lambda: 2 * float("no way"), str)'
  59 
  60 # use dot-syntax on JSON data
  61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz'
  62 
  63 # use dot-syntax on JSON data; keywords as properties are syntax-errors
  64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]'
  65 
  66 # func results are automatically called on the input
  67 tjp = '{"abc": 123, "def": 456}' | tjp len
  68 '''
  69 
  70 
  71 from itertools import islice
  72 from json import dump, load, loads
  73 compile_py = compile
  74 from math import isnan
  75 from re import compile as compile_uncached, IGNORECASE
  76 from sys import argv, exit, stderr, stdin, stdout
  77 from typing import Iterable
  78 
  79 
  80 if len(argv) < 2:
  81     print(info.strip(), file=stderr)
  82     exit(0)
  83 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  84     print(info.strip())
  85     exit(0)
  86 
  87 
  88 class Skip:
  89     pass
  90 
  91 
  92 skip = Skip()
  93 
  94 
  95 class Dottable:
  96     'Enable convenient dot-syntax access to dictionary values.'
  97 
  98     def __getattr__(self, key):
  99         return self.__dict__[key] if key in self.__dict__ else None
 100 
 101     def __getitem__(self, key):
 102         return self.__dict__[key] if key in self.__dict__ else None
 103 
 104     def __iter__(self):
 105         return iter(self.__dict__)
 106 
 107 def dotate(x):
 108     'Recursively ensure all dictionaries in a value are dot-accessible.'
 109 
 110     if isinstance(x, dict):
 111         d = Dottable()
 112         d.__dict__ = {k: dotate(v) for k, v in x.items()}
 113         return d
 114     if isinstance(x, list):
 115         return [dotate(e) for e in x]
 116     if isinstance(x, tuple):
 117         return tuple(dotate(e) for e in x)
 118     return x
 119 
 120 dotated = dotate
 121 dote = dotate
 122 doted = dotate
 123 dotified = dotate
 124 dotify = dotate
 125 dottified = dotate
 126 dottify = dotate
 127 
 128 
 129 def chunk(items, chunk_size):
 130     'Break iterable into chunks, each with up to the item-count given.'
 131 
 132     if isinstance(items, str):
 133         n = len(items)
 134         while n >= chunk_size:
 135             yield items[:chunk_size]
 136             items = items[chunk_size:]
 137             n -= chunk_size
 138         if n > 0:
 139             yield items
 140         return
 141 
 142     if not isinstance(chunk_size, int):
 143         raise Exception('non-integer chunk-size')
 144     if chunk_size < 1:
 145         raise Exception('non-positive chunk-size')
 146 
 147     it = iter(items)
 148     while True:
 149         head = tuple(islice(it, chunk_size))
 150         if not head:
 151             return
 152         yield head
 153 
 154 chunked = chunk
 155 
 156 
 157 # re_cache is used by custom func compile to cache previously-compiled
 158 # regular-expressions, which makes them quicker to (re)use in formulas
 159 re_cache = {}
 160 
 161 # ire_cache is like re_cache, except it's for case-insensitive regexes
 162 ire_cache = {}
 163 
 164 
 165 def compile(expr, flags = 0):
 166     'Speed-up using regexes across lines, by avoiding recompilations.'
 167 
 168     if flags != 0 and flags != IGNORECASE:
 169         msg = 'only the default and case-insensitive options are supported'
 170         raise Exception(msg)
 171 
 172     cache = re_cache if flags == 0 else ire_cache
 173     if expr in cache:
 174         return cache[expr]
 175 
 176     pat = compile_uncached(expr, flags)
 177     cache[expr] = pat
 178     return pat
 179 
 180 
 181 def icompile(expr):
 182     return compile(expr, IGNORECASE)
 183 
 184 
 185 def cond(*args):
 186     if len(args) == 0:
 187         return None
 188 
 189     for i, e in enumerate(args):
 190         if i % 2 == 0 and i < len(args) - 1 and e:
 191             return args[i + 1]
 192 
 193     return args[-1] if len(args) % 2 == 1 else None
 194 
 195 
 196 def dive(into, using):
 197     'Depth-first recursive caller for 1-input functions.'
 198 
 199     if callable(into):
 200         into, using = using, into
 201 
 202     def rec(v):
 203         if isinstance(v, dict):
 204             return {k: rec(v) for k, v in v.items()}
 205         if isinstance(v, Iterable) and not isinstance(v, str):
 206             return [rec(v) for v in v]
 207         return using(v)
 208 
 209     return rec(into)
 210 
 211 
 212 def divekeys(into, using):
 213     'Depth-first recursive caller for 2-input funcs which rename dict keys.'
 214 
 215     if callable(into):
 216         into, using = using, into
 217 
 218     def rec(v):
 219         if isinstance(v, dict):
 220             return {using(k): rec(v) for k, v in v.items()}
 221         if isinstance(v, Iterable) and not isinstance(v, str):
 222             return [rec(v) for i, v in enumerate(v)]
 223         return v
 224 
 225     return rec(None, into)
 226 
 227 
 228 def divekv(into, using, using2 = None):
 229     'Depth-first recursive caller for 2-input functions.'
 230 
 231     if using2 is None:
 232         if callable(into):
 233             into, using = using, into
 234     else:
 235         if not callable(using2):
 236             into, using, using2 = using2, into, using
 237 
 238     def rec(k, v):
 239         if isinstance(v, dict):
 240             return {k: rec(k, v) for k, v in v.items()}
 241         if isinstance(v, Iterable) and not isinstance(v, str):
 242             return [rec(i, v) for i, v in enumerate(v)]
 243         return using(k, v)
 244 
 245     def rec2(k, v):
 246         if isinstance(v, dict):
 247             return {str(using(k, v)): rec2(k, v) for k, v in v.items()}
 248         if isinstance(v, Iterable) and not isinstance(v, str):
 249             # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)}
 250             return [rec2(i, v) for i, v in enumerate(v)]
 251         return using2(k, v)
 252 
 253     return rec(None, into) if using2 is None else rec2(None, into)
 254 
 255 kvdive = divekv
 256 
 257 
 258 def drop(src, *what):
 259     if isinstance(src, str):
 260         for s in what:
 261             src = src.replace(s, '')
 262         return src
 263 
 264     def kdrop(src, what):
 265         return {k: v for (k, v) in src.items() if not (k in what)}
 266 
 267     if isinstance(src, dict):
 268         return kdrop(src, set(what))
 269 
 270     if isinstance(src, Iterable):
 271         what = set(what)
 272         return [kdrop(e, what) for e in src if isinstance(e, dict)]
 273 
 274     return None
 275 
 276 dropped = drop
 277 
 278 
 279 def join(x, y = ' '):
 280     'Join values into a string, or make a dict from keys and values.'
 281 
 282     if isinstance(x, str):
 283         return x.join(str(v) for v in y)
 284     if isinstance(y, str):
 285         return y.join(str(v) for v in x)
 286     return {k: v for k, v in zip(x, y)}
 287 
 288 
 289 def pick(src, *keys):
 290     if isinstance(src, dict):
 291         return {k: src.get(k, None) for k in keys}
 292     return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)]
 293 
 294 picked = pick
 295 
 296 
 297 def rescue(attempt, fallback = None):
 298     try:
 299         return attempt()
 300     except Exception as e:
 301         if callable(fallback):
 302             return fallback(e)
 303         return fallback
 304 
 305 catch = rescue
 306 catched = rescue
 307 caught = rescue
 308 recover = rescue
 309 recovered = rescue
 310 rescued = rescue
 311 
 312 
 313 def retype(x):
 314     'Try to narrow the type of the value given.'
 315 
 316     if isinstance(x, float):
 317         n = int(x)
 318         return n if float(n) == x else x
 319 
 320     if not isinstance(x, str):
 321         return x
 322 
 323     try:
 324         return loads(x)
 325     except Exception:
 326         pass
 327 
 328     try:
 329         return int(x)
 330     except Exception:
 331         pass
 332 
 333     try:
 334         return float(x)
 335     except Exception:
 336         pass
 337 
 338     return x
 339 
 340 autocast = retype
 341 autocasted = retype
 342 mold = retype
 343 molded = retype
 344 recast = retype
 345 recasted = retype
 346 remold = retype
 347 remolded = retype
 348 retyped = retype
 349 
 350 
 351 def typeof(x):
 352     # return str(type(x))
 353     return {
 354         type(None): 'null',
 355         bool: 'boolean',
 356         dict: 'object',
 357         float: 'number',
 358         int: 'number',
 359         str: 'string',
 360         list: 'array',
 361         tuple: 'array',
 362     }.get(type(x), 'other')
 363 
 364 jstype = typeof
 365 
 366 
 367 def result_needs_fixing(x):
 368     if isinstance(x, float):
 369         return not isnan(x)
 370     if x is None or isinstance(x, (bool, int, float, str)):
 371         return False
 372     rec = result_needs_fixing
 373     if isinstance(x, dict):
 374         return any(rec(k) or rec(v) for k, v in x.items())
 375     if isinstance(x, (list, tuple)):
 376         return any(rec(e) for e in x)
 377     return True
 378 
 379 
 380 def fix_result(x, default):
 381     if x is type:
 382         return type(default).__name__
 383 
 384     # if expression results in a func, auto-call it with the original data
 385     if callable(x):
 386         x = x(default)
 387 
 388     if isinstance(x, float) and isnan(x):
 389         return None
 390 
 391     if x is None or isinstance(x, (bool, int, float, str)):
 392         return x
 393 
 394     rec = fix_result
 395 
 396     if isinstance(x, dict):
 397         return {
 398             rec(k, default): rec(v, default) for k, v in x.items() if not
 399                 (isinstance(k, Skip) or isinstance(v, Skip))
 400         }
 401 
 402     if isinstance(x, Iterable):
 403         return tuple(rec(e, default) for e in x if not isinstance(e, Skip))
 404 
 405     if isinstance(x, Dottable):
 406         return rec(x.__dict__, default)
 407 
 408     if isinstance(x, Exception):
 409         raise x
 410 
 411     return None if isinstance(x, Skip) else str(x)
 412 
 413 
 414 def fail(msg, code = 1):
 415     print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr)
 416     exit(code)
 417 
 418 
 419 def message(msg, result = None):
 420     print(msg, file=stderr)
 421     return result
 422 
 423 msg = message
 424 
 425 
 426 def seemsurl(path):
 427     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 428     return any(path.startswith(p) for p in protocols)
 429 
 430 
 431 def matchkey(kv, key):
 432     if key in kv:
 433         return key
 434 
 435     low = key.lower()
 436     for k in kv.keys():
 437         if low == k.lower():
 438             return k
 439 
 440     try:
 441         i = int(key)
 442         l = len(kv)
 443         if i < 0:
 444             i += l
 445 
 446         if not (-l <= i < l):
 447             return key
 448 
 449         for j, k in enumerate(kv.keys()):
 450             if i == j:
 451                 return k
 452     except Exception:
 453         return key
 454 
 455     return key
 456 
 457 
 458 def zoom(data, keys):
 459     for i, k in enumerate(keys):
 460         if isinstance(data, dict):
 461             # m = matchkey(data, k)
 462             # if not (m in data):
 463             #     raise Exception(f'{m}: object doesn\'t have that key')
 464             data = data.get(matchkey(data, k), None)
 465             continue
 466 
 467         if isinstance(data, (list, tuple)):
 468             if k == '+':
 469                 pick = keys[i + 1:]
 470                 return [{k: e.get(k, None) for k in pick}
 471                         for e in data if isinstance(e, dict)]
 472             if k == '-':
 473                 avoid = set(keys[i + 1:])
 474                 return [{k: v for (k, v) in e.items() if not (k in avoid)}
 475                         for e in data if isinstance(e, dict)]
 476             if k == '.':
 477                 rest = keys[i + 1:]
 478                 return [zoom(e, rest) for e in data]
 479 
 480             try:
 481                 k = int(k)
 482                 l = len(data)
 483                 data = data[k] if -l <= k < l else None
 484             except Exception:
 485                 # raise Exception(f'{k}: arrays don\'t have keys like objects')
 486                 data = None
 487             continue
 488 
 489         # return None
 490         # data = None
 491         raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}')
 492 
 493     return data
 494 
 495 
 496 def make_eval_once(run):
 497     def eval_once(expr):
 498         global eval
 499         eval = None
 500         return run(expr)
 501     return eval_once
 502 
 503 eval = make_eval_once(eval)
 504 
 505 
 506 cr = '\r'
 507 crlf = '\r\n'
 508 dquo = '"'
 509 dquote = '"'
 510 empty = ''
 511 lcurly = '{'
 512 lf = '\n'
 513 rcurly = '}'
 514 s = ''
 515 squo = '\''
 516 squote = '\''
 517 # utf8bom = '\xef\xbb\xbf'
 518 
 519 nil = None
 520 none = None
 521 null = None
 522 
 523 
 524 no_input_opts = (
 525     '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null',
 526 )
 527 compact_output_opts = (
 528     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 529 )
 530 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots')
 531 modules_opts = (
 532     '-m', '--m', '-mod', '--mod', '-module', '--module',
 533     '-modules', '--modules',
 534 )
 535 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback')
 536 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom')
 537 
 538 args = argv[1:]
 539 no_input = False
 540 zoom_stdin = False
 541 trace_errors = False
 542 dottable_input = False
 543 compact_output = False
 544 
 545 while len(args) > 0:
 546     if args[0] in no_input_opts:
 547         no_input = True
 548         args = args[1:]
 549         continue
 550 
 551     if args[0] in compact_output_opts:
 552         compact_output = True
 553         args = args[1:]
 554         continue
 555 
 556     if args[0] in dot_opts:
 557         dottable_input = True
 558         args = args[1:]
 559         continue
 560 
 561     if args[0] in modules_opts:
 562         try:
 563             if len(args) < 2:
 564                 msg = 'a module name or a comma-separated list of modules'
 565                 raise Exception('expected ' + msg)
 566 
 567             g = globals()
 568             from importlib import import_module
 569             for e in args[1].split(','):
 570                 g[e] = import_module(e)
 571 
 572             g = None
 573             import_module = None
 574             args = args[2:]
 575         except Exception as e:
 576             fail(e, 1)
 577 
 578         continue
 579 
 580     if args[0] in trace_opts:
 581         trace_errors = True
 582         args = args[1:]
 583         continue
 584 
 585     if args[0] in zoom_opts:
 586         zoom_stdin = True
 587         args = args[1:]
 588         break
 589 
 590     break
 591 
 592 
 593 try:
 594     if zoom_stdin:
 595         data = load(stdin)
 596         data = zoom(data, args)
 597     else:
 598         expr = 'data'
 599         if len(args) > 0:
 600             expr = args[0]
 601             args = args[1:]
 602 
 603         if expr == '.':
 604             expr = 'data'
 605         expr = compile_py(expr, expr, mode='eval')
 606 
 607         if len(args) > 1:
 608             raise Exception('can\'t use more than 1 input')
 609         path = '-' if len(args) == 0 else args[0]
 610 
 611         if no_input:
 612             data = None
 613         elif path == '-':
 614             data = load(stdin)
 615         elif seemsurl(path):
 616             from io import TextIOWrapper
 617             from urllib.request import urlopen
 618             with urlopen(path) as inp:
 619                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 620                     data = load(txt)
 621         else:
 622             with open(path, encoding='utf-8') as inp:
 623                 data = load(inp)
 624 
 625         if dottable_input:
 626             data = dotate(data)
 627 
 628         v = val = value = d = dat = data
 629         compile_py = None
 630         exec = None
 631         open = None
 632         data = eval(expr)
 633 
 634     if result_needs_fixing(data):
 635         data = fix_result(data, data)
 636 
 637     v = data
 638     if compact_output:
 639         dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False)
 640     else:
 641         dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False)
 642     print()
 643 except BrokenPipeError:
 644     # quit quietly, instead of showing a confusing error message
 645     stderr.close()
 646     exit(0)
 647 except KeyboardInterrupt:
 648     exit(2)
 649 except Exception as e:
 650     if trace_errors:
 651         raise e
 652     else:
 653         fail(e, 1)