File: tjp.py
   1 #!/usr/bin/python
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright (c) 2026 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the "Software"), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tjp [options...] [python expression] [file/URI...]
  28 
  29 
  30 Transform Json with Python runs a python expression on a single JSON-encoded
  31 input.
  32 
  33 The expression can use either `v`, `value`, `d`, or `data` for the decoded
  34 input. Invalid-JSON inputs result in an error, with no chance of recovery.
  35 
  36 Input-sources can be either files or web-URIs. When not given a named input,
  37 the standard input is used.
  38 
  39 
  40 Examples
  41 
  42 # numbers from 0 to 5; no input is read/used
  43 tjp = 'range(6)'
  44 
  45 # using bases 1 to 5, find all their powers up to the 4th
  46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))'
  47 
  48 # keep only the last 2 items from the input
  49 tjp = 'range(1, 6)' | tjp 'data[-2:]'
  50 
  51 # chunk/regroup input items into arrays of up to 3 items each
  52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)'
  53 
  54 # ignore errors/exceptions, in favor of a fallback value
  55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")'
  56 
  57 # ignore errors/exceptions, calling a fallback func with the exception
  58 tjp = 'rescue(lambda: 2 * float("no way"), str)'
  59 
  60 # use dot-syntax on JSON data
  61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz'
  62 
  63 # use dot-syntax on JSON data; keywords as properties are syntax-errors
  64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]'
  65 
  66 # func results are automatically called on the input
  67 tjp = '{"abc": 123, "def": 456}' | tjp len
  68 
  69 # an array of 10 random integers between 1 and 10
  70 tjp -m random = '(random.randint(1, 10) for _ in range(10))'
  71 
  72 # try to auto-parse values (esp. numbers) from a table of string values
  73 echo '[{"key": "abc", "val": "123"}, {"key": "xyz", "val": "no"}]' | \\
  74     tjp '[{k: rescue(lambda: loads(v), v) for k, v in e.items()} for e in v]'
  75 '''
  76 
  77 
  78 from itertools import islice
  79 from json import dump, load, loads
  80 from math import isnan
  81 from re import compile as compile_uncached, IGNORECASE
  82 from sys import argv, exit, stderr, stdin, stdout
  83 from typing import Iterable
  84 
  85 
  86 if len(argv) < 2:
  87     print(info.strip(), file=stderr)
  88     exit(0)
  89 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
  90     print(info.strip())
  91     exit(0)
  92 
  93 
  94 class Skip:
  95     pass
  96 
  97 skip = Skip()
  98 
  99 class Dottable:
 100     'Enable convenient dot-syntax access to dictionary values.'
 101 
 102     def __getattr__(self, key):
 103         return self.__dict__[key] if key in self.__dict__ else None
 104 
 105     def __getitem__(self, key):
 106         return self.__dict__[key] if key in self.__dict__ else None
 107 
 108     def __iter__(self):
 109         return iter(self.__dict__)
 110 
 111 def dotate(x):
 112     'Recursively ensure all dictionaries in a value are dot-accessible.'
 113 
 114     if isinstance(x, dict):
 115         d = Dottable()
 116         d.__dict__ = {k: dotate(v) for k, v in x.items()}
 117         return d
 118     if isinstance(x, list):
 119         return [dotate(e) for e in x]
 120     if isinstance(x, tuple):
 121         return tuple(dotate(e) for e in x)
 122     return x
 123 
 124 dotated = dote = doted = dotified = dotify = dottified = dottify = dotate
 125 
 126 def chunk(items, chunk_size):
 127     'Break iterable into chunks, each with up to the item-count given.'
 128 
 129     if isinstance(items, str):
 130         n = len(items)
 131         while n >= chunk_size:
 132             yield items[:chunk_size]
 133             items = items[chunk_size:]
 134             n -= chunk_size
 135         if n > 0:
 136             yield items
 137         return
 138 
 139     if not isinstance(chunk_size, int):
 140         raise Exception('non-integer chunk-size')
 141     if chunk_size < 1:
 142         raise Exception('non-positive chunk-size')
 143 
 144     it = iter(items)
 145     while True:
 146         head = tuple(islice(it, chunk_size))
 147         if not head:
 148             return
 149         yield head
 150 
 151 chunked = chunk
 152 
 153 # re_cache is used by custom func compile to cache previously-compiled
 154 # regular-expressions, which makes them quicker to (re)use in formulas
 155 re_cache = {}
 156 
 157 def re_compile(expr, flags = 0):
 158     'Speed-up using regexes, by avoiding recompilations.'
 159 
 160     if flags in re_cache:
 161         cache = re_cache[flags]
 162     else:
 163         cache = {}
 164         re_cache[flags] = cache
 165     if expr in cache:
 166         return cache[expr]
 167 
 168     pat = compile_uncached(expr, flags)
 169     cache[expr] = pat
 170     return pat
 171 
 172 def icompile(expr):
 173     return re_compile(expr, IGNORECASE)
 174 
 175 def cond(*args):
 176     if len(args) == 0:
 177         return None
 178 
 179     for i, e in enumerate(args):
 180         if i % 2 == 0 and i < len(args) - 1 and e:
 181             return args[i + 1]
 182 
 183     return args[-1] if len(args) % 2 == 1 else None
 184 
 185 def dive(into, using):
 186     'Depth-first recursive caller for 1-input functions.'
 187 
 188     if callable(into):
 189         into, using = using, into
 190 
 191     def rec(v):
 192         if isinstance(v, dict):
 193             return {k: rec(v) for k, v in v.items()}
 194         if isinstance(v, Iterable) and not isinstance(v, str):
 195             return [rec(v) for v in v]
 196         return using(v)
 197 
 198     return rec(into)
 199 
 200 def divekeys(into, using):
 201     'Depth-first recursive caller for 2-input funcs which rename dict keys.'
 202 
 203     if callable(into):
 204         into, using = using, into
 205 
 206     def rec(v):
 207         if isinstance(v, dict):
 208             return {using(k): rec(v) for k, v in v.items()}
 209         if isinstance(v, Iterable) and not isinstance(v, str):
 210             return [rec(v) for i, v in enumerate(v)]
 211         return v
 212 
 213     return rec(None, into)
 214 
 215 def divekv(into, using, using2 = None):
 216     'Depth-first recursive caller for 2-input functions.'
 217 
 218     if using2 is None:
 219         if callable(into):
 220             into, using = using, into
 221     else:
 222         if not callable(using2):
 223             into, using, using2 = using2, into, using
 224 
 225     def rec(k, v):
 226         if isinstance(v, dict):
 227             return {k: rec(k, v) for k, v in v.items()}
 228         if isinstance(v, Iterable) and not isinstance(v, str):
 229             return [rec(i, v) for i, v in enumerate(v)]
 230         return using(k, v)
 231 
 232     def rec2(k, v):
 233         if isinstance(v, dict):
 234             return {str(using(k, v)): rec2(k, v) for k, v in v.items()}
 235         if isinstance(v, Iterable) and not isinstance(v, str):
 236             # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)}
 237             return [rec2(i, v) for i, v in enumerate(v)]
 238         return using2(k, v)
 239 
 240     return rec(None, into) if using2 is None else rec2(None, into)
 241 
 242 kvdive = divekv
 243 
 244 def drop(src, *what):
 245     if isinstance(src, str):
 246         for s in what:
 247             src = src.replace(s, '')
 248         return src
 249 
 250     def kdrop(src, what):
 251         return {k: v for (k, v) in src.items() if not (k in what)}
 252 
 253     if isinstance(src, dict):
 254         return kdrop(src, set(what))
 255 
 256     if isinstance(src, Iterable):
 257         what = set(what)
 258         return [kdrop(e, what) for e in src if isinstance(e, dict)]
 259 
 260     return None
 261 
 262 dropped = drop
 263 
 264 def join(x, y = ' '):
 265     'Join values into a string, or make a dict from keys and values.'
 266 
 267     if isinstance(x, str):
 268         return x.join(str(v) for v in y)
 269     if isinstance(y, str):
 270         return y.join(str(v) for v in x)
 271     return {k: v for k, v in zip(x, y)}
 272 
 273 def maybe(f, x):
 274     try:
 275         return f(x)
 276     except Exception as _:
 277         return x
 278 
 279 def number(x):
 280     try:
 281         return int(x)
 282     except Exception as _:
 283         pass
 284     try:
 285         return float(x)
 286     except Exception as _:
 287         return x
 288 
 289 def pick(src, *keys):
 290     if isinstance(src, dict):
 291         return {k: src.get(k, None) for k in keys}
 292     return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)]
 293 
 294 picked = pick
 295 
 296 def rescue(attempt, fallback = None):
 297     try:
 298         return attempt()
 299     except BrokenPipeError as e:
 300         raise e
 301     except Exception as e:
 302         if callable(fallback):
 303             return fallback(e)
 304         return fallback
 305 
 306 rescued = rescue
 307 
 308 def retype(x):
 309     'Try to narrow the type of the value given.'
 310 
 311     if isinstance(x, float):
 312         n = int(x)
 313         return n if float(n) == x else x
 314 
 315     if not isinstance(x, str):
 316         return x
 317 
 318     try:
 319         return loads(x)
 320     except Exception:
 321         pass
 322 
 323     try:
 324         return int(x)
 325     except Exception:
 326         pass
 327 
 328     try:
 329         return float(x)
 330     except Exception:
 331         pass
 332 
 333     return x
 334 
 335 autocast = autocasted = mold = molded = recast = recasted = remold = retype
 336 remolded = retyped = retype
 337 
 338 def typeof(x):
 339     # return str(type(x))
 340     return {
 341         type(None): 'null',
 342         bool: 'boolean',
 343         dict: 'object',
 344         float: 'number',
 345         int: 'number',
 346         str: 'string',
 347         list: 'array',
 348         tuple: 'array',
 349     }.get(type(x), 'other')
 350 
 351 jstype = typeof
 352 
 353 
 354 def result_needs_fixing(x):
 355     if isinstance(x, float):
 356         return not isnan(x)
 357     if x is None or isinstance(x, (bool, int, float, str)):
 358         return False
 359     rec = result_needs_fixing
 360     if isinstance(x, dict):
 361         return any(rec(k) or rec(v) for k, v in x.items())
 362     if isinstance(x, (list, tuple)):
 363         return any(rec(e) for e in x)
 364     return True
 365 
 366 def fix_result(x, default):
 367     if x is type:
 368         return type(default).__name__
 369 
 370     # if expression results in a func, auto-call it with the original data
 371     if callable(x):
 372         x = x(default)
 373 
 374     if isinstance(x, float) and isnan(x):
 375         return None
 376 
 377     if x is None or isinstance(x, (bool, int, float, str)):
 378         return x
 379 
 380     rec = fix_result
 381 
 382     if isinstance(x, dict):
 383         return {
 384             rec(k, default): rec(v, default) for k, v in x.items() if not
 385                 (isinstance(k, Skip) or isinstance(v, Skip))
 386         }
 387 
 388     if isinstance(x, Iterable):
 389         return tuple(rec(e, default) for e in x if not isinstance(e, Skip))
 390 
 391     if isinstance(x, Dottable):
 392         return rec(x.__dict__, default)
 393 
 394     if isinstance(x, Exception):
 395         raise x
 396 
 397     return None if isinstance(x, Skip) else str(x)
 398 
 399 def fail(msg, code = 1):
 400     print(str(msg), file=stderr)
 401     exit(code)
 402 
 403 def message(msg, result = None):
 404     print(msg, file=stderr)
 405     return result
 406 
 407 msg = message
 408 
 409 def seemsurl(path):
 410     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 411     return any(path.startswith(p) for p in protocols)
 412 
 413 def matchkey(kv, key):
 414     if key in kv:
 415         return key
 416 
 417     low = key.lower()
 418     for k in kv.keys():
 419         if low == k.lower():
 420             return k
 421 
 422     try:
 423         i = int(key)
 424         l = len(kv)
 425         if i < 0:
 426             i += l
 427 
 428         if not (-l <= i < l):
 429             return key
 430 
 431         for j, k in enumerate(kv.keys()):
 432             if i == j:
 433                 return k
 434     except Exception:
 435         return key
 436 
 437     return key
 438 
 439 def zoom(data, keys):
 440     for i, k in enumerate(keys):
 441         if isinstance(data, dict):
 442             # m = matchkey(data, k)
 443             # if not (m in data):
 444             #     raise Exception(f'{m}: object doesn\'t have that key')
 445             data = data.get(matchkey(data, k), None)
 446             continue
 447 
 448         if isinstance(data, (list, tuple)):
 449             if k == '+':
 450                 pick = keys[i + 1:]
 451                 return [{k: e.get(k, None) for k in pick}
 452                         for e in data if isinstance(e, dict)]
 453             if k == '-':
 454                 avoid = set(keys[i + 1:])
 455                 return [{k: v for (k, v) in e.items() if not (k in avoid)}
 456                         for e in data if isinstance(e, dict)]
 457             if k == '.':
 458                 rest = keys[i + 1:]
 459                 return [zoom(e, rest) for e in data]
 460 
 461             try:
 462                 k = int(k)
 463                 l = len(data)
 464                 data = data[k] if -l <= k < l else None
 465             except Exception:
 466                 # raise Exception(f'{k}: arrays don\'t have keys like objects')
 467                 data = None
 468             continue
 469 
 470         # return None
 471         # data = None
 472         raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}')
 473 
 474     return data
 475 
 476 def make_eval_once(run):
 477     def eval_once(expr):
 478         global eval
 479         eval = None
 480         return run(expr)
 481     return eval_once
 482 
 483 
 484 cr = '\r'
 485 crlf = '\r\n'
 486 dquo = dquote = '"'
 487 empty = ''
 488 lcurly = '{'
 489 lf = '\n'
 490 rcurly = '}'
 491 squo = squote = '\''
 492 
 493 nil = none = null = None
 494 
 495 
 496 no_input_opts = (
 497     '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null',
 498 )
 499 compact_output_opts = (
 500     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 501 )
 502 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots')
 503 modules_opts = (
 504     '-m', '--m', '-mod', '--mod', '-module', '--module',
 505     '-modules', '--modules',
 506 )
 507 pipe_opts = ('-p', '--p', '-pipe', '--pipe')
 508 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback')
 509 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom')
 510 
 511 args = argv[1:]
 512 no_input = False
 513 zoom_stdin = False
 514 pipe_mode = False
 515 trace_errors = False
 516 dottable_input = False
 517 compact_output = False
 518 
 519 while len(args) > 0:
 520     if args[0] == '--':
 521         args = args[1:]
 522         break
 523 
 524     if args[0] in no_input_opts:
 525         no_input = True
 526         args = args[1:]
 527         continue
 528 
 529     if args[0] in compact_output_opts:
 530         compact_output = True
 531         args = args[1:]
 532         continue
 533 
 534     if args[0] in dot_opts:
 535         dottable_input = True
 536         args = args[1:]
 537         continue
 538 
 539     if args[0] in pipe_opts:
 540         pipe_mode = True
 541         args = args[1:]
 542         break
 543 
 544     if args[0] in modules_opts:
 545         try:
 546             if len(args) < 2:
 547                 msg = 'a module name or a comma-separated list of modules'
 548                 raise Exception('expected ' + msg)
 549 
 550             g = globals()
 551             from importlib import import_module
 552             for e in args[1].split(','):
 553                 g[e] = import_module(e)
 554 
 555             g = None
 556             import_module = None
 557             args = args[2:]
 558         except Exception as e:
 559             fail(e, 1)
 560 
 561         continue
 562 
 563     if args[0] in trace_opts:
 564         trace_errors = True
 565         args = args[1:]
 566         continue
 567 
 568     if args[0] in zoom_opts:
 569         zoom_stdin = True
 570         args = args[1:]
 571         break
 572 
 573     break
 574 
 575 
 576 try:
 577     if zoom_stdin:
 578         data = load(stdin)
 579         data = zoom(data, args)
 580     else:
 581         expr = 'data'
 582         if len(args) > 0 and (not pipe_mode):
 583             expr = args[0]
 584             args = args[1:]
 585 
 586         if expr == '.':
 587             expr = 'data'
 588         if not pipe_mode:
 589             expr = compile(expr, expr, mode='eval')
 590 
 591         if (not pipe_mode) and len(args) > 1:
 592             raise Exception('can\'t use more than 1 input')
 593         path = '-' if len(args) == 0 or pipe_mode else args[0]
 594 
 595         if no_input:
 596             data = None
 597         elif path == '-':
 598             data = load(stdin)
 599         elif seemsurl(path):
 600             from io import TextIOWrapper
 601             from urllib.request import urlopen
 602             with urlopen(path) as inp:
 603                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 604                     data = load(txt)
 605         else:
 606             with open(path, encoding='utf-8') as inp:
 607                 data = load(inp)
 608 
 609         if dottable_input:
 610             data = dotate(data)
 611 
 612         v = val = value = d = dat = data
 613         exec = None
 614         open = None
 615         compile = None
 616 
 617         if pipe_mode:
 618             funcs = [eval(s) for s in args]
 619             eval = None
 620 
 621             # variable names `o` and `p` work like in the `pyp` tool, except
 622             # the pipeline steps were given as separate cmd-line arguments
 623             global o, p
 624 
 625             o = p = prev = v
 626             for f in funcs:
 627                 p = f(p)
 628                 if callable(p):
 629                     p = p(prev)
 630                 prev = p
 631             v = p
 632         else:
 633             eval = make_eval_once(eval)
 634             v = eval(expr)
 635 
 636     if result_needs_fixing(v):
 637         v = fix_result(v, value)
 638 
 639     if compact_output:
 640         dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False)
 641     else:
 642         dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False)
 643     print()
 644 except BrokenPipeError:
 645     # quit quietly, instead of showing a confusing error message
 646     stderr.close()
 647     exit(0)
 648 except KeyboardInterrupt:
 649     exit(2)
 650 except Exception as e:
 651     if trace_errors:
 652         raise e
 653     else:
 654         fail(e, 1)