File: tjp.py
   1 #!/usr/bin/python
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2026 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tjp [options...] [python expression] [file/URI...]
  28 
  29 
  30 Transform Json with Python runs a python expression on a single JSON-encoded
  31 input.
  32 
  33 The expression can use either `v`, `value`, `d`, or `data` for the decoded
  34 input. Invalid-JSON inputs result in an error, with no chance of recovery.
  35 
  36 Input-sources can be either files or web-URIs. When not given a named input,
  37 the standard input is used.
  38 
  39 
  40 Examples
  41 
  42 # numbers from 0 to 5; no input is read/used
  43 tjp = 'range(6)'
  44 
  45 # using bases 1 to 5, find all their powers up to the 4th
  46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))'
  47 
  48 # keep only the last 2 items from the input
  49 tjp = 'range(1, 6)' | tjp 'data[-2:]'
  50 
  51 # chunk/regroup input items into arrays of up to 3 items each
  52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)'
  53 
  54 # ignore errors/exceptions, in favor of a fallback value
  55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")'
  56 
  57 # ignore errors/exceptions, calling a fallback func with the exception
  58 tjp = 'rescue(lambda: 2 * float("no way"), str)'
  59 
  60 # use dot-syntax on JSON data
  61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz'
  62 
  63 # use dot-syntax on JSON data; keywords as properties are syntax-errors
  64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]'
  65 
  66 # func results are automatically called on the input
  67 tjp = '{"abc": 123, "def": 456}' | tjp len
  68 
  69 # an array of 10 random integers between 1 and 10
  70 tjp -m random = '(random.randint(1, 10) for _ in range(10))'
  71 
  72 # try to auto-parse values (esp. numbers) from a table of string values
  73 echo '[{"key": "abc", "val": "123"}, {"key": "xyz", "val": "no"}]' | \\
  74     tjp '[{k: rescue(lambda: loads(v), v) for k, v in e.items()} for e in v]'
  75 '''
  76 
  77 
  78 from itertools import islice
  79 from json import dump, load, loads
  80 from math import isnan
  81 from re import compile as compile_uncached, IGNORECASE
  82 from sys import argv, exit, stderr, stdin, stdout
  83 from typing import Iterable
  84 
  85 
  86 if len(argv) < 2:
  87     print(info.strip(), file=stderr)
  88     exit(0)
  89 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
  90     print(info.strip())
  91     exit(0)
  92 
  93 
  94 class Skip:
  95     pass
  96 
  97 skip = Skip()
  98 
  99 class Dottable:
 100     'Enable convenient dot-syntax access to dictionary values.'
 101 
 102     def __getattr__(self, key):
 103         return self.__dict__[key] if key in self.__dict__ else None
 104 
 105     def __getitem__(self, key):
 106         return self.__dict__[key] if key in self.__dict__ else None
 107 
 108     def __iter__(self):
 109         return iter(self.__dict__)
 110 
 111 def dotate(x):
 112     'Recursively ensure all dictionaries in a value are dot-accessible.'
 113 
 114     if isinstance(x, dict):
 115         d = Dottable()
 116         d.__dict__ = {k: dotate(v) for k, v in x.items()}
 117         return d
 118     if isinstance(x, list):
 119         return [dotate(e) for e in x]
 120     if isinstance(x, tuple):
 121         return tuple(dotate(e) for e in x)
 122     return x
 123 
 124 dotated = dote = doted = dotified = dotify = dottified = dottify = dotate
 125 
 126 def chunk(items, chunk_size):
 127     'Break iterable into chunks, each with up to the item-count given.'
 128 
 129     if isinstance(items, str):
 130         n = len(items)
 131         while n >= chunk_size:
 132             yield items[:chunk_size]
 133             items = items[chunk_size:]
 134             n -= chunk_size
 135         if n > 0:
 136             yield items
 137         return
 138 
 139     if not isinstance(chunk_size, int):
 140         raise Exception('non-integer chunk-size')
 141     if chunk_size < 1:
 142         raise Exception('non-positive chunk-size')
 143 
 144     it = iter(items)
 145     while True:
 146         head = tuple(islice(it, chunk_size))
 147         if not head:
 148             return
 149         yield head
 150 
 151 chunked = chunk
 152 
 153 # re_cache is used by custom func compile to cache previously-compiled
 154 # regular-expressions, which makes them quicker to (re)use in formulas
 155 re_cache = {}
 156 
 157 def re_compile(expr, flags = 0):
 158     'Speed-up using regexes, by avoiding recompilations.'
 159 
 160     if flags in re_cache:
 161         cache = re_cache[flags]
 162     else:
 163         cache = {}
 164         re_cache[flags] = cache
 165     if expr in cache:
 166         return cache[expr]
 167 
 168     pat = compile_uncached(expr, flags)
 169     cache[expr] = pat
 170     return pat
 171 
 172 def icompile(expr):
 173     return re_compile(expr, IGNORECASE)
 174 
 175 def cond(*args):
 176     if len(args) == 0:
 177         return None
 178 
 179     for i, e in enumerate(args):
 180         if i % 2 == 0 and i < len(args) - 1 and e:
 181             return args[i + 1]
 182 
 183     return args[-1] if len(args) % 2 == 1 else None
 184 
 185 def dive(into, using):
 186     'Depth-first recursive caller for 1-input functions.'
 187 
 188     if callable(into):
 189         into, using = using, into
 190 
 191     def rec(v):
 192         if isinstance(v, dict):
 193             return {k: rec(v) for k, v in v.items()}
 194         if isinstance(v, Iterable) and not isinstance(v, str):
 195             return [rec(v) for v in v]
 196         return using(v)
 197 
 198     return rec(into)
 199 
 200 def divekeys(into, using):
 201     'Depth-first recursive caller for 2-input funcs which rename dict keys.'
 202 
 203     if callable(into):
 204         into, using = using, into
 205 
 206     def rec(v):
 207         if isinstance(v, dict):
 208             return {using(k): rec(v) for k, v in v.items()}
 209         if isinstance(v, Iterable) and not isinstance(v, str):
 210             return [rec(v) for i, v in enumerate(v)]
 211         return v
 212 
 213     return rec(None, into)
 214 
 215 def divekv(into, using, using2 = None):
 216     'Depth-first recursive caller for 2-input functions.'
 217 
 218     if using2 is None:
 219         if callable(into):
 220             into, using = using, into
 221     else:
 222         if not callable(using2):
 223             into, using, using2 = using2, into, using
 224 
 225     def rec(k, v):
 226         if isinstance(v, dict):
 227             return {k: rec(k, v) for k, v in v.items()}
 228         if isinstance(v, Iterable) and not isinstance(v, str):
 229             return [rec(i, v) for i, v in enumerate(v)]
 230         return using(k, v)
 231 
 232     def rec2(k, v):
 233         if isinstance(v, dict):
 234             return {str(using(k, v)): rec2(k, v) for k, v in v.items()}
 235         if isinstance(v, Iterable) and not isinstance(v, str):
 236             # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)}
 237             return [rec2(i, v) for i, v in enumerate(v)]
 238         return using2(k, v)
 239 
 240     return rec(None, into) if using2 is None else rec2(None, into)
 241 
 242 kvdive = divekv
 243 
 244 def drop(src, *what):
 245     if isinstance(src, str):
 246         for s in what:
 247             src = src.replace(s, '')
 248         return src
 249 
 250     def kdrop(src, what):
 251         return {k: v for (k, v) in src.items() if not (k in what)}
 252 
 253     if isinstance(src, dict):
 254         return kdrop(src, set(what))
 255 
 256     if isinstance(src, Iterable):
 257         what = set(what)
 258         return [kdrop(e, what) for e in src if isinstance(e, dict)]
 259 
 260     return None
 261 
 262 dropped = drop
 263 
 264 def join(x, y = ' '):
 265     'Join values into a string, or make a dict from keys and values.'
 266 
 267     if isinstance(x, str):
 268         return x.join(str(v) for v in y)
 269     if isinstance(y, str):
 270         return y.join(str(v) for v in x)
 271     return {k: v for k, v in zip(x, y)}
 272 
 273 def pick(src, *keys):
 274     if isinstance(src, dict):
 275         return {k: src.get(k, None) for k in keys}
 276     return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)]
 277 
 278 picked = pick
 279 
 280 def rescue(attempt, fallback = None):
 281     try:
 282         return attempt()
 283     except BrokenPipeError as e:
 284         raise e
 285     except Exception as e:
 286         if callable(fallback):
 287             return fallback(e)
 288         return fallback
 289 
 290 rescued = rescue
 291 
 292 def retype(x):
 293     'Try to narrow the type of the value given.'
 294 
 295     if isinstance(x, float):
 296         n = int(x)
 297         return n if float(n) == x else x
 298 
 299     if not isinstance(x, str):
 300         return x
 301 
 302     try:
 303         return loads(x)
 304     except Exception:
 305         pass
 306 
 307     try:
 308         return int(x)
 309     except Exception:
 310         pass
 311 
 312     try:
 313         return float(x)
 314     except Exception:
 315         pass
 316 
 317     return x
 318 
 319 autocast = autocasted = mold = molded = recast = recasted = remold = retype
 320 remolded = retyped = retype
 321 
 322 def typeof(x):
 323     # return str(type(x))
 324     return {
 325         type(None): 'null',
 326         bool: 'boolean',
 327         dict: 'object',
 328         float: 'number',
 329         int: 'number',
 330         str: 'string',
 331         list: 'array',
 332         tuple: 'array',
 333     }.get(type(x), 'other')
 334 
 335 jstype = typeof
 336 
 337 
 338 def result_needs_fixing(x):
 339     if isinstance(x, float):
 340         return not isnan(x)
 341     if x is None or isinstance(x, (bool, int, float, str)):
 342         return False
 343     rec = result_needs_fixing
 344     if isinstance(x, dict):
 345         return any(rec(k) or rec(v) for k, v in x.items())
 346     if isinstance(x, (list, tuple)):
 347         return any(rec(e) for e in x)
 348     return True
 349 
 350 def fix_result(x, default):
 351     if x is type:
 352         return type(default).__name__
 353 
 354     # if expression results in a func, auto-call it with the original data
 355     if callable(x):
 356         x = x(default)
 357 
 358     if isinstance(x, float) and isnan(x):
 359         return None
 360 
 361     if x is None or isinstance(x, (bool, int, float, str)):
 362         return x
 363 
 364     rec = fix_result
 365 
 366     if isinstance(x, dict):
 367         return {
 368             rec(k, default): rec(v, default) for k, v in x.items() if not
 369                 (isinstance(k, Skip) or isinstance(v, Skip))
 370         }
 371 
 372     if isinstance(x, Iterable):
 373         return tuple(rec(e, default) for e in x if not isinstance(e, Skip))
 374 
 375     if isinstance(x, Dottable):
 376         return rec(x.__dict__, default)
 377 
 378     if isinstance(x, Exception):
 379         raise x
 380 
 381     return None if isinstance(x, Skip) else str(x)
 382 
 383 def fail(msg, code = 1):
 384     print(str(msg), file=stderr)
 385     exit(code)
 386 
 387 def message(msg, result = None):
 388     print(msg, file=stderr)
 389     return result
 390 
 391 msg = message
 392 
 393 def seemsurl(path):
 394     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 395     return any(path.startswith(p) for p in protocols)
 396 
 397 def matchkey(kv, key):
 398     if key in kv:
 399         return key
 400 
 401     low = key.lower()
 402     for k in kv.keys():
 403         if low == k.lower():
 404             return k
 405 
 406     try:
 407         i = int(key)
 408         l = len(kv)
 409         if i < 0:
 410             i += l
 411 
 412         if not (-l <= i < l):
 413             return key
 414 
 415         for j, k in enumerate(kv.keys()):
 416             if i == j:
 417                 return k
 418     except Exception:
 419         return key
 420 
 421     return key
 422 
 423 def zoom(data, keys):
 424     for i, k in enumerate(keys):
 425         if isinstance(data, dict):
 426             # m = matchkey(data, k)
 427             # if not (m in data):
 428             #     raise Exception(f'{m}: object doesn\'t have that key')
 429             data = data.get(matchkey(data, k), None)
 430             continue
 431 
 432         if isinstance(data, (list, tuple)):
 433             if k == '+':
 434                 pick = keys[i + 1:]
 435                 return [{k: e.get(k, None) for k in pick}
 436                         for e in data if isinstance(e, dict)]
 437             if k == '-':
 438                 avoid = set(keys[i + 1:])
 439                 return [{k: v for (k, v) in e.items() if not (k in avoid)}
 440                         for e in data if isinstance(e, dict)]
 441             if k == '.':
 442                 rest = keys[i + 1:]
 443                 return [zoom(e, rest) for e in data]
 444 
 445             try:
 446                 k = int(k)
 447                 l = len(data)
 448                 data = data[k] if -l <= k < l else None
 449             except Exception:
 450                 # raise Exception(f'{k}: arrays don\'t have keys like objects')
 451                 data = None
 452             continue
 453 
 454         # return None
 455         # data = None
 456         raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}')
 457 
 458     return data
 459 
 460 def make_eval_once(run):
 461     def eval_once(expr):
 462         global eval
 463         eval = None
 464         return run(expr)
 465     return eval_once
 466 
 467 
 468 cr = '\r'
 469 crlf = '\r\n'
 470 dquo = dquote = '"'
 471 empty = ''
 472 lcurly = '{'
 473 lf = '\n'
 474 rcurly = '}'
 475 squo = squote = '\''
 476 
 477 nil = none = null = None
 478 
 479 
 480 no_input_opts = (
 481     '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null',
 482 )
 483 compact_output_opts = (
 484     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 485 )
 486 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots')
 487 modules_opts = (
 488     '-m', '--m', '-mod', '--mod', '-module', '--module',
 489     '-modules', '--modules',
 490 )
 491 pipe_opts = ('-p', '--p', '-pipe', '--pipe')
 492 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback')
 493 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom')
 494 
 495 args = argv[1:]
 496 no_input = False
 497 zoom_stdin = False
 498 pipe_mode = False
 499 trace_errors = False
 500 dottable_input = False
 501 compact_output = False
 502 
 503 while len(args) > 0:
 504     if args[0] == '--':
 505         args = args[1:]
 506         break
 507 
 508     if args[0] in no_input_opts:
 509         no_input = True
 510         args = args[1:]
 511         continue
 512 
 513     if args[0] in compact_output_opts:
 514         compact_output = True
 515         args = args[1:]
 516         continue
 517 
 518     if args[0] in dot_opts:
 519         dottable_input = True
 520         args = args[1:]
 521         continue
 522 
 523     if args[0] in pipe_opts:
 524         pipe_mode = True
 525         args = args[1:]
 526         break
 527 
 528     if args[0] in modules_opts:
 529         try:
 530             if len(args) < 2:
 531                 msg = 'a module name or a comma-separated list of modules'
 532                 raise Exception('expected ' + msg)
 533 
 534             g = globals()
 535             from importlib import import_module
 536             for e in args[1].split(','):
 537                 g[e] = import_module(e)
 538 
 539             g = None
 540             import_module = None
 541             args = args[2:]
 542         except Exception as e:
 543             fail(e, 1)
 544 
 545         continue
 546 
 547     if args[0] in trace_opts:
 548         trace_errors = True
 549         args = args[1:]
 550         continue
 551 
 552     if args[0] in zoom_opts:
 553         zoom_stdin = True
 554         args = args[1:]
 555         break
 556 
 557     break
 558 
 559 
 560 try:
 561     if zoom_stdin:
 562         data = load(stdin)
 563         data = zoom(data, args)
 564     else:
 565         expr = 'data'
 566         if len(args) > 0 and (not pipe_mode):
 567             expr = args[0]
 568             args = args[1:]
 569 
 570         if expr == '.':
 571             expr = 'data'
 572         if not pipe_mode:
 573             expr = compile(expr, expr, mode='eval')
 574 
 575         if (not pipe_mode) and len(args) > 1:
 576             raise Exception('can\'t use more than 1 input')
 577         path = '-' if len(args) == 0 or pipe_mode else args[0]
 578 
 579         if no_input:
 580             data = None
 581         elif path == '-':
 582             data = load(stdin)
 583         elif seemsurl(path):
 584             from io import TextIOWrapper
 585             from urllib.request import urlopen
 586             with urlopen(path) as inp:
 587                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 588                     data = load(txt)
 589         else:
 590             with open(path, encoding='utf-8') as inp:
 591                 data = load(inp)
 592 
 593         if dottable_input:
 594             data = dotate(data)
 595 
 596         v = val = value = d = dat = data
 597         exec = None
 598         open = None
 599         compile = None
 600 
 601         if pipe_mode:
 602             funcs = [eval(s) for s in args]
 603             eval = None
 604 
 605             # variable names `o` and `p` work like in the `pyp` tool, except
 606             # the pipeline steps were given as separate cmd-line arguments
 607             global o, p
 608 
 609             o = p = prev = v
 610             for f in funcs:
 611                 p = f(p)
 612                 if callable(p):
 613                     p = p(prev)
 614                 prev = p
 615             v = p
 616         else:
 617             eval = make_eval_once(eval)
 618             v = eval(expr)
 619 
 620     if result_needs_fixing(v):
 621         v = fix_result(v, value)
 622 
 623     if compact_output:
 624         dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False)
 625     else:
 626         dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False)
 627     print()
 628 except BrokenPipeError:
 629     # quit quietly, instead of showing a confusing error message
 630     stderr.close()
 631     exit(0)
 632 except KeyboardInterrupt:
 633     exit(2)
 634 except Exception as e:
 635     if trace_errors:
 636         raise e
 637     else:
 638         fail(e, 1)