File: tjp.py 1 #!/usr/bin/python 2 3 # The MIT License (MIT) 4 # 5 # Copyright (c) 2026 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the "Software"), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tjp [options...] [python expression] [file/URI...] 28 29 30 Transform Json with Python runs a python expression on a single JSON-encoded 31 input. 32 33 The expression can use either `v`, `value`, `d`, or `data` for the decoded 34 input. Invalid-JSON inputs result in an error, with no chance of recovery. 35 36 Input-sources can be either files or web-URIs. When not given a named input, 37 the standard input is used. 38 39 40 Examples 41 42 # numbers from 0 to 5; no input is read/used 43 tjp = 'range(6)' 44 45 # using bases 1 to 5, find all their powers up to the 4th 46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))' 47 48 # keep only the last 2 items from the input 49 tjp = 'range(1, 6)' | tjp 'data[-2:]' 50 51 # chunk/regroup input items into arrays of up to 3 items each 52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)' 53 54 # ignore errors/exceptions, in favor of a fallback value 55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")' 56 57 # ignore errors/exceptions, calling a fallback func with the exception 58 tjp = 'rescue(lambda: 2 * float("no way"), str)' 59 60 # use dot-syntax on JSON data 61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz' 62 63 # use dot-syntax on JSON data; keywords as properties are syntax-errors 64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]' 65 66 # func results are automatically called on the input 67 tjp = '{"abc": 123, "def": 456}' | tjp len 68 69 # an array of 10 random integers between 1 and 10 70 tjp -m random = '(random.randint(1, 10) for _ in range(10))' 71 72 # try to auto-parse values (esp. numbers) from a table of string values 73 echo '[{"key": "abc", "val": "123"}, {"key": "xyz", "val": "no"}]' | \\ 74 tjp '[{k: rescue(lambda: loads(v), v) for k, v in e.items()} for e in v]' 75 ''' 76 77 78 from itertools import islice 79 from json import dump, load, loads 80 from math import isnan 81 from re import compile as compile_uncached, IGNORECASE 82 from sys import argv, exit, stderr, stdin, stdout 83 from typing import Iterable 84 85 86 if len(argv) < 2: 87 print(info.strip(), file=stderr) 88 exit(0) 89 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 90 print(info.strip()) 91 exit(0) 92 93 94 class Skip: 95 pass 96 97 skip = Skip() 98 99 class Dottable: 100 'Enable convenient dot-syntax access to dictionary values.' 101 102 def __getattr__(self, key): 103 return self.__dict__[key] if key in self.__dict__ else None 104 105 def __getitem__(self, key): 106 return self.__dict__[key] if key in self.__dict__ else None 107 108 def __iter__(self): 109 return iter(self.__dict__) 110 111 def dotate(x): 112 'Recursively ensure all dictionaries in a value are dot-accessible.' 113 114 if isinstance(x, dict): 115 d = Dottable() 116 d.__dict__ = {k: dotate(v) for k, v in x.items()} 117 return d 118 if isinstance(x, list): 119 return [dotate(e) for e in x] 120 if isinstance(x, tuple): 121 return tuple(dotate(e) for e in x) 122 return x 123 124 dotated = dote = doted = dotified = dotify = dottified = dottify = dotate 125 126 def chunk(items, chunk_size): 127 'Break iterable into chunks, each with up to the item-count given.' 128 129 if isinstance(items, str): 130 n = len(items) 131 while n >= chunk_size: 132 yield items[:chunk_size] 133 items = items[chunk_size:] 134 n -= chunk_size 135 if n > 0: 136 yield items 137 return 138 139 if not isinstance(chunk_size, int): 140 raise Exception('non-integer chunk-size') 141 if chunk_size < 1: 142 raise Exception('non-positive chunk-size') 143 144 it = iter(items) 145 while True: 146 head = tuple(islice(it, chunk_size)) 147 if not head: 148 return 149 yield head 150 151 chunked = chunk 152 153 # re_cache is used by custom func compile to cache previously-compiled 154 # regular-expressions, which makes them quicker to (re)use in formulas 155 re_cache = {} 156 157 def re_compile(expr, flags = 0): 158 'Speed-up using regexes, by avoiding recompilations.' 159 160 if flags in re_cache: 161 cache = re_cache[flags] 162 else: 163 cache = {} 164 re_cache[flags] = cache 165 if expr in cache: 166 return cache[expr] 167 168 pat = compile_uncached(expr, flags) 169 cache[expr] = pat 170 return pat 171 172 def icompile(expr): 173 return re_compile(expr, IGNORECASE) 174 175 def cond(*args): 176 if len(args) == 0: 177 return None 178 179 for i, e in enumerate(args): 180 if i % 2 == 0 and i < len(args) - 1 and e: 181 return args[i + 1] 182 183 return args[-1] if len(args) % 2 == 1 else None 184 185 def dive(into, using): 186 'Depth-first recursive caller for 1-input functions.' 187 188 if callable(into): 189 into, using = using, into 190 191 def rec(v): 192 if isinstance(v, dict): 193 return {k: rec(v) for k, v in v.items()} 194 if isinstance(v, Iterable) and not isinstance(v, str): 195 return [rec(v) for v in v] 196 return using(v) 197 198 return rec(into) 199 200 def divekeys(into, using): 201 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 202 203 if callable(into): 204 into, using = using, into 205 206 def rec(v): 207 if isinstance(v, dict): 208 return {using(k): rec(v) for k, v in v.items()} 209 if isinstance(v, Iterable) and not isinstance(v, str): 210 return [rec(v) for i, v in enumerate(v)] 211 return v 212 213 return rec(None, into) 214 215 def divekv(into, using, using2 = None): 216 'Depth-first recursive caller for 2-input functions.' 217 218 if using2 is None: 219 if callable(into): 220 into, using = using, into 221 else: 222 if not callable(using2): 223 into, using, using2 = using2, into, using 224 225 def rec(k, v): 226 if isinstance(v, dict): 227 return {k: rec(k, v) for k, v in v.items()} 228 if isinstance(v, Iterable) and not isinstance(v, str): 229 return [rec(i, v) for i, v in enumerate(v)] 230 return using(k, v) 231 232 def rec2(k, v): 233 if isinstance(v, dict): 234 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 235 if isinstance(v, Iterable) and not isinstance(v, str): 236 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 237 return [rec2(i, v) for i, v in enumerate(v)] 238 return using2(k, v) 239 240 return rec(None, into) if using2 is None else rec2(None, into) 241 242 kvdive = divekv 243 244 def drop(src, *what): 245 if isinstance(src, str): 246 for s in what: 247 src = src.replace(s, '') 248 return src 249 250 def kdrop(src, what): 251 return {k: v for (k, v) in src.items() if not (k in what)} 252 253 if isinstance(src, dict): 254 return kdrop(src, set(what)) 255 256 if isinstance(src, Iterable): 257 what = set(what) 258 return [kdrop(e, what) for e in src if isinstance(e, dict)] 259 260 return None 261 262 dropped = drop 263 264 def join(x, y = ' '): 265 'Join values into a string, or make a dict from keys and values.' 266 267 if isinstance(x, str): 268 return x.join(str(v) for v in y) 269 if isinstance(y, str): 270 return y.join(str(v) for v in x) 271 return {k: v for k, v in zip(x, y)} 272 273 def maybe(f, x): 274 try: 275 return f(x) 276 except Exception as _: 277 return x 278 279 def number(x): 280 try: 281 return int(x) 282 except Exception as _: 283 pass 284 try: 285 return float(x) 286 except Exception as _: 287 return x 288 289 def pick(src, *keys): 290 if isinstance(src, dict): 291 return {k: src.get(k, None) for k in keys} 292 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 293 294 picked = pick 295 296 def rescue(attempt, fallback = None): 297 try: 298 return attempt() 299 except BrokenPipeError as e: 300 raise e 301 except Exception as e: 302 if callable(fallback): 303 return fallback(e) 304 return fallback 305 306 rescued = rescue 307 308 def retype(x): 309 'Try to narrow the type of the value given.' 310 311 if isinstance(x, float): 312 n = int(x) 313 return n if float(n) == x else x 314 315 if not isinstance(x, str): 316 return x 317 318 try: 319 return loads(x) 320 except Exception: 321 pass 322 323 try: 324 return int(x) 325 except Exception: 326 pass 327 328 try: 329 return float(x) 330 except Exception: 331 pass 332 333 return x 334 335 autocast = autocasted = mold = molded = recast = recasted = remold = retype 336 remolded = retyped = retype 337 338 def typeof(x): 339 # return str(type(x)) 340 return { 341 type(None): 'null', 342 bool: 'boolean', 343 dict: 'object', 344 float: 'number', 345 int: 'number', 346 str: 'string', 347 list: 'array', 348 tuple: 'array', 349 }.get(type(x), 'other') 350 351 jstype = typeof 352 353 354 def result_needs_fixing(x): 355 if isinstance(x, float): 356 return not isnan(x) 357 if x is None or isinstance(x, (bool, int, float, str)): 358 return False 359 rec = result_needs_fixing 360 if isinstance(x, dict): 361 return any(rec(k) or rec(v) for k, v in x.items()) 362 if isinstance(x, (list, tuple)): 363 return any(rec(e) for e in x) 364 return True 365 366 def fix_result(x, default): 367 if x is type: 368 return type(default).__name__ 369 370 # if expression results in a func, auto-call it with the original data 371 if callable(x): 372 x = x(default) 373 374 if isinstance(x, float) and isnan(x): 375 return None 376 377 if x is None or isinstance(x, (bool, int, float, str)): 378 return x 379 380 rec = fix_result 381 382 if isinstance(x, dict): 383 return { 384 rec(k, default): rec(v, default) for k, v in x.items() if not 385 (isinstance(k, Skip) or isinstance(v, Skip)) 386 } 387 388 if isinstance(x, Iterable): 389 return tuple(rec(e, default) for e in x if not isinstance(e, Skip)) 390 391 if isinstance(x, Dottable): 392 return rec(x.__dict__, default) 393 394 if isinstance(x, Exception): 395 raise x 396 397 return None if isinstance(x, Skip) else str(x) 398 399 def fail(msg, code = 1): 400 print(str(msg), file=stderr) 401 exit(code) 402 403 def message(msg, result = None): 404 print(msg, file=stderr) 405 return result 406 407 msg = message 408 409 def seemsurl(path): 410 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 411 return any(path.startswith(p) for p in protocols) 412 413 def matchkey(kv, key): 414 if key in kv: 415 return key 416 417 low = key.lower() 418 for k in kv.keys(): 419 if low == k.lower(): 420 return k 421 422 try: 423 i = int(key) 424 l = len(kv) 425 if i < 0: 426 i += l 427 428 if not (-l <= i < l): 429 return key 430 431 for j, k in enumerate(kv.keys()): 432 if i == j: 433 return k 434 except Exception: 435 return key 436 437 return key 438 439 def zoom(data, keys): 440 for i, k in enumerate(keys): 441 if isinstance(data, dict): 442 # m = matchkey(data, k) 443 # if not (m in data): 444 # raise Exception(f'{m}: object doesn\'t have that key') 445 data = data.get(matchkey(data, k), None) 446 continue 447 448 if isinstance(data, (list, tuple)): 449 if k == '+': 450 pick = keys[i + 1:] 451 return [{k: e.get(k, None) for k in pick} 452 for e in data if isinstance(e, dict)] 453 if k == '-': 454 avoid = set(keys[i + 1:]) 455 return [{k: v for (k, v) in e.items() if not (k in avoid)} 456 for e in data if isinstance(e, dict)] 457 if k == '.': 458 rest = keys[i + 1:] 459 return [zoom(e, rest) for e in data] 460 461 try: 462 k = int(k) 463 l = len(data) 464 data = data[k] if -l <= k < l else None 465 except Exception: 466 # raise Exception(f'{k}: arrays don\'t have keys like objects') 467 data = None 468 continue 469 470 # return None 471 # data = None 472 raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}') 473 474 return data 475 476 def make_eval_once(run): 477 def eval_once(expr): 478 global eval 479 eval = None 480 return run(expr) 481 return eval_once 482 483 484 cr = '\r' 485 crlf = '\r\n' 486 dquo = dquote = '"' 487 empty = '' 488 lcurly = '{' 489 lf = '\n' 490 rcurly = '}' 491 squo = squote = '\'' 492 493 nil = none = null = None 494 495 496 no_input_opts = ( 497 '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 498 ) 499 compact_output_opts = ( 500 '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0', 501 ) 502 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots') 503 modules_opts = ( 504 '-m', '--m', '-mod', '--mod', '-module', '--module', 505 '-modules', '--modules', 506 ) 507 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 508 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 509 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom') 510 511 args = argv[1:] 512 no_input = False 513 zoom_stdin = False 514 pipe_mode = False 515 trace_errors = False 516 dottable_input = False 517 compact_output = False 518 519 while len(args) > 0: 520 if args[0] == '--': 521 args = args[1:] 522 break 523 524 if args[0] in no_input_opts: 525 no_input = True 526 args = args[1:] 527 continue 528 529 if args[0] in compact_output_opts: 530 compact_output = True 531 args = args[1:] 532 continue 533 534 if args[0] in dot_opts: 535 dottable_input = True 536 args = args[1:] 537 continue 538 539 if args[0] in pipe_opts: 540 pipe_mode = True 541 args = args[1:] 542 break 543 544 if args[0] in modules_opts: 545 try: 546 if len(args) < 2: 547 msg = 'a module name or a comma-separated list of modules' 548 raise Exception('expected ' + msg) 549 550 g = globals() 551 from importlib import import_module 552 for e in args[1].split(','): 553 g[e] = import_module(e) 554 555 g = None 556 import_module = None 557 args = args[2:] 558 except Exception as e: 559 fail(e, 1) 560 561 continue 562 563 if args[0] in trace_opts: 564 trace_errors = True 565 args = args[1:] 566 continue 567 568 if args[0] in zoom_opts: 569 zoom_stdin = True 570 args = args[1:] 571 break 572 573 break 574 575 576 try: 577 if zoom_stdin: 578 data = load(stdin) 579 data = zoom(data, args) 580 else: 581 expr = 'data' 582 if len(args) > 0 and (not pipe_mode): 583 expr = args[0] 584 args = args[1:] 585 586 if expr == '.': 587 expr = 'data' 588 if not pipe_mode: 589 expr = compile(expr, expr, mode='eval') 590 591 if (not pipe_mode) and len(args) > 1: 592 raise Exception('can\'t use more than 1 input') 593 path = '-' if len(args) == 0 or pipe_mode else args[0] 594 595 if no_input: 596 data = None 597 elif path == '-': 598 data = load(stdin) 599 elif seemsurl(path): 600 from io import TextIOWrapper 601 from urllib.request import urlopen 602 with urlopen(path) as inp: 603 with TextIOWrapper(inp, encoding='utf-8') as txt: 604 data = load(txt) 605 else: 606 with open(path, encoding='utf-8') as inp: 607 data = load(inp) 608 609 if dottable_input: 610 data = dotate(data) 611 612 v = val = value = d = dat = data 613 exec = None 614 open = None 615 compile = None 616 617 if pipe_mode: 618 funcs = [eval(s) for s in args] 619 eval = None 620 621 # variable names `o` and `p` work like in the `pyp` tool, except 622 # the pipeline steps were given as separate cmd-line arguments 623 global o, p 624 625 o = p = prev = v 626 for f in funcs: 627 p = f(p) 628 if callable(p): 629 p = p(prev) 630 prev = p 631 v = p 632 else: 633 eval = make_eval_once(eval) 634 v = eval(expr) 635 636 if result_needs_fixing(v): 637 v = fix_result(v, value) 638 639 if compact_output: 640 dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False) 641 else: 642 dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False) 643 print() 644 except BrokenPipeError: 645 # quit quietly, instead of showing a confusing error message 646 stderr.close() 647 exit(0) 648 except KeyboardInterrupt: 649 exit(2) 650 except Exception as e: 651 if trace_errors: 652 raise e 653 else: 654 fail(e, 1)