File: tjp.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tjp [options...] [python expression] [file/URI...]
  28 
  29 
  30 Transform Json with Python runs a python expression on a single JSON-encoded
  31 input.
  32 
  33 The expression can use either `v`, `value`, `d`, or `data` for the decoded
  34 input. Invalid-JSON inputs result in an error, with no chance of recovery.
  35 
  36 Input-sources can be either files or web-URIs. When not given a named input,
  37 the standard input is used.
  38 
  39 
  40 Examples
  41 
  42 # numbers from 0 to 5; no input is read/used
  43 tjp = 'range(6)'
  44 
  45 # using bases 1 to 5, find all their powers up to the 4th
  46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))'
  47 
  48 # keep only the last 2 items from the input
  49 tjp = 'range(1, 6)' | tjp 'data[-2:]'
  50 
  51 # chunk/regroup input items into arrays of up to 3 items each
  52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)'
  53 
  54 # ignore errors/exceptions, in favor of a fallback value
  55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")'
  56 
  57 # ignore errors/exceptions, calling a fallback func with the exception
  58 tjp = 'rescue(lambda: 2 * float("no way"), str)'
  59 
  60 # use dot-syntax on JSON data
  61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz'
  62 
  63 # use dot-syntax on JSON data; keywords as properties are syntax-errors
  64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]'
  65 
  66 # func results are automatically called on the input
  67 tjp = '{"abc": 123, "def": 456}' | tjp len
  68 
  69 # an array of 10 random integers between 1 and 10
  70 tjp -m random = '(random.randint(1, 10) for _ in range(10))'
  71 
  72 # try to auto-parse values (esp. numbers) from a table of string values
  73 echo '[{"key": "abc", "val": "123"}, {"key": "xyz", "val": "no"}]' | \\
  74     tjp '[{k: rescue(lambda: loads(v), v) for k, v in e.items()} for e in v]'
  75 '''
  76 
  77 
  78 from itertools import islice
  79 from json import dump, load, loads
  80 from math import isnan
  81 from re import compile as compile_uncached, IGNORECASE
  82 from sys import argv, exit, stderr, stdin, stdout
  83 from typing import Iterable
  84 
  85 
  86 if len(argv) < 2:
  87     print(info.strip(), file=stderr)
  88     exit(0)
  89 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
  90     print(info.strip())
  91     exit(0)
  92 
  93 
  94 class Skip:
  95     pass
  96 
  97 skip = Skip()
  98 
  99 class Dottable:
 100     'Enable convenient dot-syntax access to dictionary values.'
 101 
 102     def __getattr__(self, key):
 103         return self.__dict__[key] if key in self.__dict__ else None
 104 
 105     def __getitem__(self, key):
 106         return self.__dict__[key] if key in self.__dict__ else None
 107 
 108     def __iter__(self):
 109         return iter(self.__dict__)
 110 
 111 def dotate(x):
 112     'Recursively ensure all dictionaries in a value are dot-accessible.'
 113 
 114     if isinstance(x, dict):
 115         d = Dottable()
 116         d.__dict__ = {k: dotate(v) for k, v in x.items()}
 117         return d
 118     if isinstance(x, list):
 119         return [dotate(e) for e in x]
 120     if isinstance(x, tuple):
 121         return tuple(dotate(e) for e in x)
 122     return x
 123 
 124 dotated = dote = doted = dotified = dotify = dottified = dottify = dotate
 125 
 126 def chunk(items, chunk_size):
 127     'Break iterable into chunks, each with up to the item-count given.'
 128 
 129     if isinstance(items, str):
 130         n = len(items)
 131         while n >= chunk_size:
 132             yield items[:chunk_size]
 133             items = items[chunk_size:]
 134             n -= chunk_size
 135         if n > 0:
 136             yield items
 137         return
 138 
 139     if not isinstance(chunk_size, int):
 140         raise Exception('non-integer chunk-size')
 141     if chunk_size < 1:
 142         raise Exception('non-positive chunk-size')
 143 
 144     it = iter(items)
 145     while True:
 146         head = tuple(islice(it, chunk_size))
 147         if not head:
 148             return
 149         yield head
 150 
 151 chunked = chunk
 152 
 153 # re_cache is used by custom func compile to cache previously-compiled
 154 # regular-expressions, which makes them quicker to (re)use in formulas
 155 re_cache = {}
 156 
 157 def re_compile(expr, flags = 0):
 158     'Speed-up using regexes, by avoiding recompilations.'
 159 
 160     if flags in re_cache:
 161         cache = re_cache[flags]
 162     else:
 163         cache = {}
 164         re_cache[flags] = cache
 165     if expr in cache:
 166         return cache[expr]
 167 
 168     pat = compile_uncached(expr, flags)
 169     cache[expr] = pat
 170     return pat
 171 
 172 def icompile(expr):
 173     return re_compile(expr, IGNORECASE)
 174 
 175 def cond(*args):
 176     if len(args) == 0:
 177         return None
 178 
 179     for i, e in enumerate(args):
 180         if i % 2 == 0 and i < len(args) - 1 and e:
 181             return args[i + 1]
 182 
 183     return args[-1] if len(args) % 2 == 1 else None
 184 
 185 def dive(into, using):
 186     'Depth-first recursive caller for 1-input functions.'
 187 
 188     if callable(into):
 189         into, using = using, into
 190 
 191     def rec(v):
 192         if isinstance(v, dict):
 193             return {k: rec(v) for k, v in v.items()}
 194         if isinstance(v, Iterable) and not isinstance(v, str):
 195             return [rec(v) for v in v]
 196         return using(v)
 197 
 198     return rec(into)
 199 
 200 def divekeys(into, using):
 201     'Depth-first recursive caller for 2-input funcs which rename dict keys.'
 202 
 203     if callable(into):
 204         into, using = using, into
 205 
 206     def rec(v):
 207         if isinstance(v, dict):
 208             return {using(k): rec(v) for k, v in v.items()}
 209         if isinstance(v, Iterable) and not isinstance(v, str):
 210             return [rec(v) for i, v in enumerate(v)]
 211         return v
 212 
 213     return rec(None, into)
 214 
 215 def divekv(into, using, using2 = None):
 216     'Depth-first recursive caller for 2-input functions.'
 217 
 218     if using2 is None:
 219         if callable(into):
 220             into, using = using, into
 221     else:
 222         if not callable(using2):
 223             into, using, using2 = using2, into, using
 224 
 225     def rec(k, v):
 226         if isinstance(v, dict):
 227             return {k: rec(k, v) for k, v in v.items()}
 228         if isinstance(v, Iterable) and not isinstance(v, str):
 229             return [rec(i, v) for i, v in enumerate(v)]
 230         return using(k, v)
 231 
 232     def rec2(k, v):
 233         if isinstance(v, dict):
 234             return {str(using(k, v)): rec2(k, v) for k, v in v.items()}
 235         if isinstance(v, Iterable) and not isinstance(v, str):
 236             # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)}
 237             return [rec2(i, v) for i, v in enumerate(v)]
 238         return using2(k, v)
 239 
 240     return rec(None, into) if using2 is None else rec2(None, into)
 241 
 242 kvdive = divekv
 243 
 244 def drop(src, *what):
 245     if isinstance(src, str):
 246         for s in what:
 247             src = src.replace(s, '')
 248         return src
 249 
 250     def kdrop(src, what):
 251         return {k: v for (k, v) in src.items() if not (k in what)}
 252 
 253     if isinstance(src, dict):
 254         return kdrop(src, set(what))
 255 
 256     if isinstance(src, Iterable):
 257         what = set(what)
 258         return [kdrop(e, what) for e in src if isinstance(e, dict)]
 259 
 260     return None
 261 
 262 dropped = drop
 263 
 264 def join(x, y = ' '):
 265     'Join values into a string, or make a dict from keys and values.'
 266 
 267     if isinstance(x, str):
 268         return x.join(str(v) for v in y)
 269     if isinstance(y, str):
 270         return y.join(str(v) for v in x)
 271     return {k: v for k, v in zip(x, y)}
 272 
 273 def pick(src, *keys):
 274     if isinstance(src, dict):
 275         return {k: src.get(k, None) for k in keys}
 276     return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)]
 277 
 278 picked = pick
 279 
 280 def rescue(attempt, fallback = None):
 281     try:
 282         return attempt()
 283     except Exception as e:
 284         if callable(fallback):
 285             return fallback(e)
 286         return fallback
 287 
 288 rescued = rescue
 289 
 290 def retype(x):
 291     'Try to narrow the type of the value given.'
 292 
 293     if isinstance(x, float):
 294         n = int(x)
 295         return n if float(n) == x else x
 296 
 297     if not isinstance(x, str):
 298         return x
 299 
 300     try:
 301         return loads(x)
 302     except Exception:
 303         pass
 304 
 305     try:
 306         return int(x)
 307     except Exception:
 308         pass
 309 
 310     try:
 311         return float(x)
 312     except Exception:
 313         pass
 314 
 315     return x
 316 
 317 autocast = autocasted = mold = molded = recast = recasted = remold = retype
 318 remolded = retyped = retype
 319 
 320 def typeof(x):
 321     # return str(type(x))
 322     return {
 323         type(None): 'null',
 324         bool: 'boolean',
 325         dict: 'object',
 326         float: 'number',
 327         int: 'number',
 328         str: 'string',
 329         list: 'array',
 330         tuple: 'array',
 331     }.get(type(x), 'other')
 332 
 333 jstype = typeof
 334 
 335 
 336 def result_needs_fixing(x):
 337     if isinstance(x, float):
 338         return not isnan(x)
 339     if x is None or isinstance(x, (bool, int, float, str)):
 340         return False
 341     rec = result_needs_fixing
 342     if isinstance(x, dict):
 343         return any(rec(k) or rec(v) for k, v in x.items())
 344     if isinstance(x, (list, tuple)):
 345         return any(rec(e) for e in x)
 346     return True
 347 
 348 def fix_result(x, default):
 349     if x is type:
 350         return type(default).__name__
 351 
 352     # if expression results in a func, auto-call it with the original data
 353     if callable(x):
 354         x = x(default)
 355 
 356     if isinstance(x, float) and isnan(x):
 357         return None
 358 
 359     if x is None or isinstance(x, (bool, int, float, str)):
 360         return x
 361 
 362     rec = fix_result
 363 
 364     if isinstance(x, dict):
 365         return {
 366             rec(k, default): rec(v, default) for k, v in x.items() if not
 367                 (isinstance(k, Skip) or isinstance(v, Skip))
 368         }
 369 
 370     if isinstance(x, Iterable):
 371         return tuple(rec(e, default) for e in x if not isinstance(e, Skip))
 372 
 373     if isinstance(x, Dottable):
 374         return rec(x.__dict__, default)
 375 
 376     if isinstance(x, Exception):
 377         raise x
 378 
 379     return None if isinstance(x, Skip) else str(x)
 380 
 381 def fail(msg, code = 1):
 382     print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr)
 383     exit(code)
 384 
 385 def message(msg, result = None):
 386     print(msg, file=stderr)
 387     return result
 388 
 389 msg = message
 390 
 391 def seemsurl(path):
 392     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 393     return any(path.startswith(p) for p in protocols)
 394 
 395 def matchkey(kv, key):
 396     if key in kv:
 397         return key
 398 
 399     low = key.lower()
 400     for k in kv.keys():
 401         if low == k.lower():
 402             return k
 403 
 404     try:
 405         i = int(key)
 406         l = len(kv)
 407         if i < 0:
 408             i += l
 409 
 410         if not (-l <= i < l):
 411             return key
 412 
 413         for j, k in enumerate(kv.keys()):
 414             if i == j:
 415                 return k
 416     except Exception:
 417         return key
 418 
 419     return key
 420 
 421 def zoom(data, keys):
 422     for i, k in enumerate(keys):
 423         if isinstance(data, dict):
 424             # m = matchkey(data, k)
 425             # if not (m in data):
 426             #     raise Exception(f'{m}: object doesn\'t have that key')
 427             data = data.get(matchkey(data, k), None)
 428             continue
 429 
 430         if isinstance(data, (list, tuple)):
 431             if k == '+':
 432                 pick = keys[i + 1:]
 433                 return [{k: e.get(k, None) for k in pick}
 434                         for e in data if isinstance(e, dict)]
 435             if k == '-':
 436                 avoid = set(keys[i + 1:])
 437                 return [{k: v for (k, v) in e.items() if not (k in avoid)}
 438                         for e in data if isinstance(e, dict)]
 439             if k == '.':
 440                 rest = keys[i + 1:]
 441                 return [zoom(e, rest) for e in data]
 442 
 443             try:
 444                 k = int(k)
 445                 l = len(data)
 446                 data = data[k] if -l <= k < l else None
 447             except Exception:
 448                 # raise Exception(f'{k}: arrays don\'t have keys like objects')
 449                 data = None
 450             continue
 451 
 452         # return None
 453         # data = None
 454         raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}')
 455 
 456     return data
 457 
 458 def make_eval_once(run):
 459     def eval_once(expr):
 460         global eval
 461         eval = None
 462         return run(expr)
 463     return eval_once
 464 
 465 
 466 cr = '\r'
 467 crlf = '\r\n'
 468 dquo = '"'
 469 dquote = '"'
 470 empty = ''
 471 lcurly = '{'
 472 lf = '\n'
 473 rcurly = '}'
 474 s = ''
 475 squo = '\''
 476 squote = '\''
 477 
 478 nil = None
 479 none = None
 480 null = None
 481 
 482 
 483 no_input_opts = (
 484     '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null',
 485 )
 486 compact_output_opts = (
 487     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 488 )
 489 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots')
 490 modules_opts = (
 491     '-m', '--m', '-mod', '--mod', '-module', '--module',
 492     '-modules', '--modules',
 493 )
 494 pipe_opts = ('-p', '--p', '-pipe', '--pipe')
 495 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback')
 496 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom')
 497 
 498 args = argv[1:]
 499 no_input = False
 500 zoom_stdin = False
 501 pipe_mode = False
 502 trace_errors = False
 503 dottable_input = False
 504 compact_output = False
 505 
 506 while len(args) > 0:
 507     if args[0] == '--':
 508         args = args[1:]
 509         break
 510 
 511     if args[0] in no_input_opts:
 512         no_input = True
 513         args = args[1:]
 514         continue
 515 
 516     if args[0] in compact_output_opts:
 517         compact_output = True
 518         args = args[1:]
 519         continue
 520 
 521     if args[0] in dot_opts:
 522         dottable_input = True
 523         args = args[1:]
 524         continue
 525 
 526     if args[0] in pipe_opts:
 527         pipe_mode = True
 528         args = args[1:]
 529         break
 530 
 531     if args[0] in modules_opts:
 532         try:
 533             if len(args) < 2:
 534                 msg = 'a module name or a comma-separated list of modules'
 535                 raise Exception('expected ' + msg)
 536 
 537             g = globals()
 538             from importlib import import_module
 539             for e in args[1].split(','):
 540                 g[e] = import_module(e)
 541 
 542             g = None
 543             import_module = None
 544             args = args[2:]
 545         except Exception as e:
 546             fail(e, 1)
 547 
 548         continue
 549 
 550     if args[0] in trace_opts:
 551         trace_errors = True
 552         args = args[1:]
 553         continue
 554 
 555     if args[0] in zoom_opts:
 556         zoom_stdin = True
 557         args = args[1:]
 558         break
 559 
 560     break
 561 
 562 
 563 try:
 564     if zoom_stdin:
 565         data = load(stdin)
 566         data = zoom(data, args)
 567     else:
 568         expr = 'data'
 569         if len(args) > 0 and (not pipe_mode):
 570             expr = args[0]
 571             args = args[1:]
 572 
 573         if expr == '.':
 574             expr = 'data'
 575         if not pipe_mode:
 576             expr = compile(expr, expr, mode='eval')
 577 
 578         if (not pipe_mode) and len(args) > 1:
 579             raise Exception('can\'t use more than 1 input')
 580         path = '-' if len(args) == 0 or pipe_mode else args[0]
 581 
 582         if no_input:
 583             data = None
 584         elif path == '-':
 585             data = load(stdin)
 586         elif seemsurl(path):
 587             from io import TextIOWrapper
 588             from urllib.request import urlopen
 589             with urlopen(path) as inp:
 590                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 591                     data = load(txt)
 592         else:
 593             with open(path, encoding='utf-8') as inp:
 594                 data = load(inp)
 595 
 596         if dottable_input:
 597             data = dotate(data)
 598 
 599         v = val = value = d = dat = data
 600         exec = None
 601         open = None
 602         compile = None
 603 
 604         if pipe_mode:
 605             funcs = [eval(s) for s in args]
 606             eval = None
 607 
 608             # variable names `o` and `p` work like in the `pyp` tool, except
 609             # the pipeline steps were given as separate cmd-line arguments
 610             global o, p
 611 
 612             o = p = prev = v
 613             for f in funcs:
 614                 p = f(p)
 615                 if callable(p):
 616                     p = p(prev)
 617                 prev = p
 618             v = p
 619         else:
 620             eval = make_eval_once(eval)
 621             v = eval(expr)
 622 
 623     if result_needs_fixing(v):
 624         v = fix_result(v, value)
 625 
 626     if compact_output:
 627         dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False)
 628     else:
 629         dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False)
 630     print()
 631 except BrokenPipeError:
 632     # quit quietly, instead of showing a confusing error message
 633     stderr.close()
 634     exit(0)
 635 except KeyboardInterrupt:
 636     exit(2)
 637 except Exception as e:
 638     if trace_errors:
 639         raise e
 640     else:
 641         fail(e, 1)