File: tjp.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tjp [options...] [python expression] [file/URI...]
  28 
  29 
  30 Transform Json with Python runs a python expression on a single JSON-encoded
  31 input.
  32 
  33 The expression can use either `v`, `value`, `d`, or `data` for the decoded
  34 input. Invalid-JSON inputs result in an error, with no chance of recovery.
  35 
  36 Input-sources can be either files or web-URIs. When not given a named input,
  37 the standard input is used.
  38 
  39 
  40 Examples
  41 
  42 # numbers from 0 to 5; no input is read/used
  43 tjp = 'range(6)'
  44 
  45 # using bases 1 to 5, find all their powers up to the 4th
  46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))'
  47 
  48 # keep only the last 2 items from the input
  49 tjp = 'range(1, 6)' | tjp 'data[-2:]'
  50 
  51 # chunk/regroup input items into arrays of up to 3 items each
  52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)'
  53 
  54 # ignore errors/exceptions, in favor of a fallback value
  55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")'
  56 
  57 # ignore errors/exceptions, calling a fallback func with the exception
  58 tjp = 'rescue(lambda: 2 * float("no way"), str)'
  59 
  60 # use dot-syntax on JSON data
  61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz'
  62 
  63 # use dot-syntax on JSON data; keywords as properties are syntax-errors
  64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]'
  65 
  66 # func results are automatically called on the input
  67 tjp = '{"abc": 123, "def": 456}' | tjp len
  68 
  69 # an array of 10 random integers between 1 and 10
  70 tjp -m random = '(random.randint(1, 10) for _ in range(10))'
  71 '''
  72 
  73 
  74 from itertools import islice
  75 from json import dump, load, loads
  76 from math import isnan
  77 from re import compile as compile_uncached, IGNORECASE
  78 from sys import argv, exit, stderr, stdin, stdout
  79 from typing import Iterable
  80 
  81 
  82 if len(argv) < 2:
  83     print(info.strip(), file=stderr)
  84     exit(0)
  85 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  86     print(info.strip())
  87     exit(0)
  88 
  89 
  90 class Skip:
  91     pass
  92 
  93 skip = Skip()
  94 
  95 class Dottable:
  96     'Enable convenient dot-syntax access to dictionary values.'
  97 
  98     def __getattr__(self, key):
  99         return self.__dict__[key] if key in self.__dict__ else None
 100 
 101     def __getitem__(self, key):
 102         return self.__dict__[key] if key in self.__dict__ else None
 103 
 104     def __iter__(self):
 105         return iter(self.__dict__)
 106 
 107 def dotate(x):
 108     'Recursively ensure all dictionaries in a value are dot-accessible.'
 109 
 110     if isinstance(x, dict):
 111         d = Dottable()
 112         d.__dict__ = {k: dotate(v) for k, v in x.items()}
 113         return d
 114     if isinstance(x, list):
 115         return [dotate(e) for e in x]
 116     if isinstance(x, tuple):
 117         return tuple(dotate(e) for e in x)
 118     return x
 119 
 120 dotated = dotate
 121 dote = dotate
 122 doted = dotate
 123 dotified = dotate
 124 dotify = dotate
 125 dottified = dotate
 126 dottify = dotate
 127 
 128 def chunk(items, chunk_size):
 129     'Break iterable into chunks, each with up to the item-count given.'
 130 
 131     if isinstance(items, str):
 132         n = len(items)
 133         while n >= chunk_size:
 134             yield items[:chunk_size]
 135             items = items[chunk_size:]
 136             n -= chunk_size
 137         if n > 0:
 138             yield items
 139         return
 140 
 141     if not isinstance(chunk_size, int):
 142         raise Exception('non-integer chunk-size')
 143     if chunk_size < 1:
 144         raise Exception('non-positive chunk-size')
 145 
 146     it = iter(items)
 147     while True:
 148         head = tuple(islice(it, chunk_size))
 149         if not head:
 150             return
 151         yield head
 152 
 153 chunked = chunk
 154 
 155 # re_cache is used by custom func compile to cache previously-compiled
 156 # regular-expressions, which makes them quicker to (re)use in formulas
 157 re_cache = {}
 158 
 159 def re_compile(expr, flags = 0):
 160     'Speed-up using regexes, by avoiding recompilations.'
 161 
 162     if flags in re_cache:
 163         cache = re_cache[flags]
 164     else:
 165         cache = {}
 166         re_cache[flags] = cache
 167     if expr in cache:
 168         return cache[expr]
 169 
 170     pat = compile_uncached(expr, flags)
 171     cache[expr] = pat
 172     return pat
 173 
 174 def icompile(expr):
 175     return re_compile(expr, IGNORECASE)
 176 
 177 def cond(*args):
 178     if len(args) == 0:
 179         return None
 180 
 181     for i, e in enumerate(args):
 182         if i % 2 == 0 and i < len(args) - 1 and e:
 183             return args[i + 1]
 184 
 185     return args[-1] if len(args) % 2 == 1 else None
 186 
 187 def dive(into, using):
 188     'Depth-first recursive caller for 1-input functions.'
 189 
 190     if callable(into):
 191         into, using = using, into
 192 
 193     def rec(v):
 194         if isinstance(v, dict):
 195             return {k: rec(v) for k, v in v.items()}
 196         if isinstance(v, Iterable) and not isinstance(v, str):
 197             return [rec(v) for v in v]
 198         return using(v)
 199 
 200     return rec(into)
 201 
 202 def divekeys(into, using):
 203     'Depth-first recursive caller for 2-input funcs which rename dict keys.'
 204 
 205     if callable(into):
 206         into, using = using, into
 207 
 208     def rec(v):
 209         if isinstance(v, dict):
 210             return {using(k): rec(v) for k, v in v.items()}
 211         if isinstance(v, Iterable) and not isinstance(v, str):
 212             return [rec(v) for i, v in enumerate(v)]
 213         return v
 214 
 215     return rec(None, into)
 216 
 217 def divekv(into, using, using2 = None):
 218     'Depth-first recursive caller for 2-input functions.'
 219 
 220     if using2 is None:
 221         if callable(into):
 222             into, using = using, into
 223     else:
 224         if not callable(using2):
 225             into, using, using2 = using2, into, using
 226 
 227     def rec(k, v):
 228         if isinstance(v, dict):
 229             return {k: rec(k, v) for k, v in v.items()}
 230         if isinstance(v, Iterable) and not isinstance(v, str):
 231             return [rec(i, v) for i, v in enumerate(v)]
 232         return using(k, v)
 233 
 234     def rec2(k, v):
 235         if isinstance(v, dict):
 236             return {str(using(k, v)): rec2(k, v) for k, v in v.items()}
 237         if isinstance(v, Iterable) and not isinstance(v, str):
 238             # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)}
 239             return [rec2(i, v) for i, v in enumerate(v)]
 240         return using2(k, v)
 241 
 242     return rec(None, into) if using2 is None else rec2(None, into)
 243 
 244 kvdive = divekv
 245 
 246 def drop(src, *what):
 247     if isinstance(src, str):
 248         for s in what:
 249             src = src.replace(s, '')
 250         return src
 251 
 252     def kdrop(src, what):
 253         return {k: v for (k, v) in src.items() if not (k in what)}
 254 
 255     if isinstance(src, dict):
 256         return kdrop(src, set(what))
 257 
 258     if isinstance(src, Iterable):
 259         what = set(what)
 260         return [kdrop(e, what) for e in src if isinstance(e, dict)]
 261 
 262     return None
 263 
 264 dropped = drop
 265 
 266 def join(x, y = ' '):
 267     'Join values into a string, or make a dict from keys and values.'
 268 
 269     if isinstance(x, str):
 270         return x.join(str(v) for v in y)
 271     if isinstance(y, str):
 272         return y.join(str(v) for v in x)
 273     return {k: v for k, v in zip(x, y)}
 274 
 275 def pick(src, *keys):
 276     if isinstance(src, dict):
 277         return {k: src.get(k, None) for k in keys}
 278     return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)]
 279 
 280 picked = pick
 281 
 282 def rescue(attempt, fallback = None):
 283     try:
 284         return attempt()
 285     except Exception as e:
 286         if callable(fallback):
 287             return fallback(e)
 288         return fallback
 289 
 290 catch = rescue
 291 recover = rescue
 292 rescued = rescue
 293 
 294 def retype(x):
 295     'Try to narrow the type of the value given.'
 296 
 297     if isinstance(x, float):
 298         n = int(x)
 299         return n if float(n) == x else x
 300 
 301     if not isinstance(x, str):
 302         return x
 303 
 304     try:
 305         return loads(x)
 306     except Exception:
 307         pass
 308 
 309     try:
 310         return int(x)
 311     except Exception:
 312         pass
 313 
 314     try:
 315         return float(x)
 316     except Exception:
 317         pass
 318 
 319     return x
 320 
 321 autocast = retype
 322 autocasted = retype
 323 mold = retype
 324 molded = retype
 325 recast = retype
 326 recasted = retype
 327 remold = retype
 328 remolded = retype
 329 retyped = retype
 330 
 331 def typeof(x):
 332     # return str(type(x))
 333     return {
 334         type(None): 'null',
 335         bool: 'boolean',
 336         dict: 'object',
 337         float: 'number',
 338         int: 'number',
 339         str: 'string',
 340         list: 'array',
 341         tuple: 'array',
 342     }.get(type(x), 'other')
 343 
 344 jstype = typeof
 345 
 346 
 347 def result_needs_fixing(x):
 348     if isinstance(x, float):
 349         return not isnan(x)
 350     if x is None or isinstance(x, (bool, int, float, str)):
 351         return False
 352     rec = result_needs_fixing
 353     if isinstance(x, dict):
 354         return any(rec(k) or rec(v) for k, v in x.items())
 355     if isinstance(x, (list, tuple)):
 356         return any(rec(e) for e in x)
 357     return True
 358 
 359 def fix_result(x, default):
 360     if x is type:
 361         return type(default).__name__
 362 
 363     # if expression results in a func, auto-call it with the original data
 364     if callable(x):
 365         x = x(default)
 366 
 367     if isinstance(x, float) and isnan(x):
 368         return None
 369 
 370     if x is None or isinstance(x, (bool, int, float, str)):
 371         return x
 372 
 373     rec = fix_result
 374 
 375     if isinstance(x, dict):
 376         return {
 377             rec(k, default): rec(v, default) for k, v in x.items() if not
 378                 (isinstance(k, Skip) or isinstance(v, Skip))
 379         }
 380 
 381     if isinstance(x, Iterable):
 382         return tuple(rec(e, default) for e in x if not isinstance(e, Skip))
 383 
 384     if isinstance(x, Dottable):
 385         return rec(x.__dict__, default)
 386 
 387     if isinstance(x, Exception):
 388         raise x
 389 
 390     return None if isinstance(x, Skip) else str(x)
 391 
 392 def fail(msg, code = 1):
 393     print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr)
 394     exit(code)
 395 
 396 def message(msg, result = None):
 397     print(msg, file=stderr)
 398     return result
 399 
 400 msg = message
 401 
 402 def seemsurl(path):
 403     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 404     return any(path.startswith(p) for p in protocols)
 405 
 406 def matchkey(kv, key):
 407     if key in kv:
 408         return key
 409 
 410     low = key.lower()
 411     for k in kv.keys():
 412         if low == k.lower():
 413             return k
 414 
 415     try:
 416         i = int(key)
 417         l = len(kv)
 418         if i < 0:
 419             i += l
 420 
 421         if not (-l <= i < l):
 422             return key
 423 
 424         for j, k in enumerate(kv.keys()):
 425             if i == j:
 426                 return k
 427     except Exception:
 428         return key
 429 
 430     return key
 431 
 432 def zoom(data, keys):
 433     for i, k in enumerate(keys):
 434         if isinstance(data, dict):
 435             # m = matchkey(data, k)
 436             # if not (m in data):
 437             #     raise Exception(f'{m}: object doesn\'t have that key')
 438             data = data.get(matchkey(data, k), None)
 439             continue
 440 
 441         if isinstance(data, (list, tuple)):
 442             if k == '+':
 443                 pick = keys[i + 1:]
 444                 return [{k: e.get(k, None) for k in pick}
 445                         for e in data if isinstance(e, dict)]
 446             if k == '-':
 447                 avoid = set(keys[i + 1:])
 448                 return [{k: v for (k, v) in e.items() if not (k in avoid)}
 449                         for e in data if isinstance(e, dict)]
 450             if k == '.':
 451                 rest = keys[i + 1:]
 452                 return [zoom(e, rest) for e in data]
 453 
 454             try:
 455                 k = int(k)
 456                 l = len(data)
 457                 data = data[k] if -l <= k < l else None
 458             except Exception:
 459                 # raise Exception(f'{k}: arrays don\'t have keys like objects')
 460                 data = None
 461             continue
 462 
 463         # return None
 464         # data = None
 465         raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}')
 466 
 467     return data
 468 
 469 def make_eval_once(run):
 470     def eval_once(expr):
 471         global eval
 472         eval = None
 473         return run(expr)
 474     return eval_once
 475 
 476 
 477 cr = '\r'
 478 crlf = '\r\n'
 479 dquo = '"'
 480 dquote = '"'
 481 empty = ''
 482 lcurly = '{'
 483 lf = '\n'
 484 rcurly = '}'
 485 s = ''
 486 squo = '\''
 487 squote = '\''
 488 # utf8bom = '\xef\xbb\xbf'
 489 
 490 nil = None
 491 none = None
 492 null = None
 493 
 494 
 495 no_input_opts = (
 496     '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null',
 497 )
 498 compact_output_opts = (
 499     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 500 )
 501 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots')
 502 modules_opts = (
 503     '-m', '--m', '-mod', '--mod', '-module', '--module',
 504     '-modules', '--modules',
 505 )
 506 pipe_opts = ('-p', '--p', '-pipe', '--pipe')
 507 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback')
 508 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom')
 509 
 510 args = argv[1:]
 511 no_input = False
 512 zoom_stdin = False
 513 pipe_mode = False
 514 trace_errors = False
 515 dottable_input = False
 516 compact_output = False
 517 
 518 while len(args) > 0:
 519     if args[0] == '--':
 520         args = args[1:]
 521         break
 522 
 523     if args[0] in no_input_opts:
 524         no_input = True
 525         args = args[1:]
 526         continue
 527 
 528     if args[0] in compact_output_opts:
 529         compact_output = True
 530         args = args[1:]
 531         continue
 532 
 533     if args[0] in dot_opts:
 534         dottable_input = True
 535         args = args[1:]
 536         continue
 537 
 538     if args[0] in pipe_opts:
 539         pipe_mode = True
 540         args = args[1:]
 541         break
 542 
 543     if args[0] in modules_opts:
 544         try:
 545             if len(args) < 2:
 546                 msg = 'a module name or a comma-separated list of modules'
 547                 raise Exception('expected ' + msg)
 548 
 549             g = globals()
 550             from importlib import import_module
 551             for e in args[1].split(','):
 552                 g[e] = import_module(e)
 553 
 554             g = None
 555             import_module = None
 556             args = args[2:]
 557         except Exception as e:
 558             fail(e, 1)
 559 
 560         continue
 561 
 562     if args[0] in trace_opts:
 563         trace_errors = True
 564         args = args[1:]
 565         continue
 566 
 567     if args[0] in zoom_opts:
 568         zoom_stdin = True
 569         args = args[1:]
 570         break
 571 
 572     break
 573 
 574 
 575 try:
 576     if zoom_stdin:
 577         data = load(stdin)
 578         data = zoom(data, args)
 579     else:
 580         expr = 'data'
 581         if len(args) > 0 and (not pipe_mode):
 582             expr = args[0]
 583             args = args[1:]
 584 
 585         if expr == '.':
 586             expr = 'data'
 587         if not pipe_mode:
 588             expr = compile(expr, expr, mode='eval')
 589 
 590         if (not pipe_mode) and len(args) > 1:
 591             raise Exception('can\'t use more than 1 input')
 592         path = '-' if len(args) == 0 or pipe_mode else args[0]
 593 
 594         if no_input:
 595             data = None
 596         elif path == '-':
 597             data = load(stdin)
 598         elif seemsurl(path):
 599             from io import TextIOWrapper
 600             from urllib.request import urlopen
 601             with urlopen(path) as inp:
 602                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 603                     data = load(txt)
 604         else:
 605             with open(path, encoding='utf-8') as inp:
 606                 data = load(inp)
 607 
 608         if dottable_input:
 609             data = dotate(data)
 610 
 611         v = val = value = d = dat = data
 612         exec = None
 613         open = None
 614         compile = None
 615 
 616         if pipe_mode:
 617             funcs = [eval(s) for s in args]
 618             eval = None
 619 
 620             # variable names `o` and `p` work like in the `pyp` tool, except
 621             # the pipeline steps were given as separate cmd-line arguments
 622             global o, p
 623 
 624             o = p = prev = v
 625             for f in funcs:
 626                 p = f(p)
 627                 if callable(p):
 628                     p = p(prev)
 629                 prev = p
 630             v = p
 631         else:
 632             eval = make_eval_once(eval)
 633             v = eval(expr)
 634 
 635     if result_needs_fixing(v):
 636         v = fix_result(v, value)
 637 
 638     if compact_output:
 639         dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False)
 640     else:
 641         dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False)
 642     print()
 643 except BrokenPipeError:
 644     # quit quietly, instead of showing a confusing error message
 645     stderr.close()
 646     exit(0)
 647 except KeyboardInterrupt:
 648     exit(2)
 649 except Exception as e:
 650     if trace_errors:
 651         raise e
 652     else:
 653         fail(e, 1)