File: tjp.py 1 #!/usr/bin/python 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2026 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tjp [options...] [python expression] [file/URI...] 28 29 30 Transform Json with Python runs a python expression on a single JSON-encoded 31 input. 32 33 The expression can use either `v`, `value`, `d`, or `data` for the decoded 34 input. Invalid-JSON inputs result in an error, with no chance of recovery. 35 36 Input-sources can be either files or web-URIs. When not given a named input, 37 the standard input is used. 38 39 40 Examples 41 42 # numbers from 0 to 5; no input is read/used 43 tjp = 'range(6)' 44 45 # using bases 1 to 5, find all their powers up to the 4th 46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))' 47 48 # keep only the last 2 items from the input 49 tjp = 'range(1, 6)' | tjp 'data[-2:]' 50 51 # chunk/regroup input items into arrays of up to 3 items each 52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)' 53 54 # ignore errors/exceptions, in favor of a fallback value 55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")' 56 57 # ignore errors/exceptions, calling a fallback func with the exception 58 tjp = 'rescue(lambda: 2 * float("no way"), str)' 59 60 # use dot-syntax on JSON data 61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz' 62 63 # use dot-syntax on JSON data; keywords as properties are syntax-errors 64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]' 65 66 # func results are automatically called on the input 67 tjp = '{"abc": 123, "def": 456}' | tjp len 68 69 # an array of 10 random integers between 1 and 10 70 tjp -m random = '(random.randint(1, 10) for _ in range(10))' 71 72 # try to auto-parse values (esp. numbers) from a table of string values 73 echo '[{"key": "abc", "val": "123"}, {"key": "xyz", "val": "no"}]' | \\ 74 tjp '[{k: rescue(lambda: loads(v), v) for k, v in e.items()} for e in v]' 75 ''' 76 77 78 from itertools import islice 79 from json import dump, load, loads 80 from math import isnan 81 from re import compile as compile_uncached, IGNORECASE 82 from sys import argv, exit, stderr, stdin, stdout 83 from typing import Iterable 84 85 86 if len(argv) < 2: 87 print(info.strip(), file=stderr) 88 exit(0) 89 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 90 print(info.strip()) 91 exit(0) 92 93 94 class Skip: 95 pass 96 97 skip = Skip() 98 99 class Dottable: 100 'Enable convenient dot-syntax access to dictionary values.' 101 102 def __getattr__(self, key): 103 return self.__dict__[key] if key in self.__dict__ else None 104 105 def __getitem__(self, key): 106 return self.__dict__[key] if key in self.__dict__ else None 107 108 def __iter__(self): 109 return iter(self.__dict__) 110 111 def dotate(x): 112 'Recursively ensure all dictionaries in a value are dot-accessible.' 113 114 if isinstance(x, dict): 115 d = Dottable() 116 d.__dict__ = {k: dotate(v) for k, v in x.items()} 117 return d 118 if isinstance(x, list): 119 return [dotate(e) for e in x] 120 if isinstance(x, tuple): 121 return tuple(dotate(e) for e in x) 122 return x 123 124 dotated = dote = doted = dotified = dotify = dottified = dottify = dotate 125 126 def chunk(items, chunk_size): 127 'Break iterable into chunks, each with up to the item-count given.' 128 129 if isinstance(items, str): 130 n = len(items) 131 while n >= chunk_size: 132 yield items[:chunk_size] 133 items = items[chunk_size:] 134 n -= chunk_size 135 if n > 0: 136 yield items 137 return 138 139 if not isinstance(chunk_size, int): 140 raise Exception('non-integer chunk-size') 141 if chunk_size < 1: 142 raise Exception('non-positive chunk-size') 143 144 it = iter(items) 145 while True: 146 head = tuple(islice(it, chunk_size)) 147 if not head: 148 return 149 yield head 150 151 chunked = chunk 152 153 # re_cache is used by custom func compile to cache previously-compiled 154 # regular-expressions, which makes them quicker to (re)use in formulas 155 re_cache = {} 156 157 def re_compile(expr, flags = 0): 158 'Speed-up using regexes, by avoiding recompilations.' 159 160 if flags in re_cache: 161 cache = re_cache[flags] 162 else: 163 cache = {} 164 re_cache[flags] = cache 165 if expr in cache: 166 return cache[expr] 167 168 pat = compile_uncached(expr, flags) 169 cache[expr] = pat 170 return pat 171 172 def icompile(expr): 173 return re_compile(expr, IGNORECASE) 174 175 def cond(*args): 176 if len(args) == 0: 177 return None 178 179 for i, e in enumerate(args): 180 if i % 2 == 0 and i < len(args) - 1 and e: 181 return args[i + 1] 182 183 return args[-1] if len(args) % 2 == 1 else None 184 185 def dive(into, using): 186 'Depth-first recursive caller for 1-input functions.' 187 188 if callable(into): 189 into, using = using, into 190 191 def rec(v): 192 if isinstance(v, dict): 193 return {k: rec(v) for k, v in v.items()} 194 if isinstance(v, Iterable) and not isinstance(v, str): 195 return [rec(v) for v in v] 196 return using(v) 197 198 return rec(into) 199 200 def divekeys(into, using): 201 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 202 203 if callable(into): 204 into, using = using, into 205 206 def rec(v): 207 if isinstance(v, dict): 208 return {using(k): rec(v) for k, v in v.items()} 209 if isinstance(v, Iterable) and not isinstance(v, str): 210 return [rec(v) for i, v in enumerate(v)] 211 return v 212 213 return rec(None, into) 214 215 def divekv(into, using, using2 = None): 216 'Depth-first recursive caller for 2-input functions.' 217 218 if using2 is None: 219 if callable(into): 220 into, using = using, into 221 else: 222 if not callable(using2): 223 into, using, using2 = using2, into, using 224 225 def rec(k, v): 226 if isinstance(v, dict): 227 return {k: rec(k, v) for k, v in v.items()} 228 if isinstance(v, Iterable) and not isinstance(v, str): 229 return [rec(i, v) for i, v in enumerate(v)] 230 return using(k, v) 231 232 def rec2(k, v): 233 if isinstance(v, dict): 234 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 235 if isinstance(v, Iterable) and not isinstance(v, str): 236 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 237 return [rec2(i, v) for i, v in enumerate(v)] 238 return using2(k, v) 239 240 return rec(None, into) if using2 is None else rec2(None, into) 241 242 kvdive = divekv 243 244 def drop(src, *what): 245 if isinstance(src, str): 246 for s in what: 247 src = src.replace(s, '') 248 return src 249 250 def kdrop(src, what): 251 return {k: v for (k, v) in src.items() if not (k in what)} 252 253 if isinstance(src, dict): 254 return kdrop(src, set(what)) 255 256 if isinstance(src, Iterable): 257 what = set(what) 258 return [kdrop(e, what) for e in src if isinstance(e, dict)] 259 260 return None 261 262 dropped = drop 263 264 def join(x, y = ' '): 265 'Join values into a string, or make a dict from keys and values.' 266 267 if isinstance(x, str): 268 return x.join(str(v) for v in y) 269 if isinstance(y, str): 270 return y.join(str(v) for v in x) 271 return {k: v for k, v in zip(x, y)} 272 273 def pick(src, *keys): 274 if isinstance(src, dict): 275 return {k: src.get(k, None) for k in keys} 276 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 277 278 picked = pick 279 280 def rescue(attempt, fallback = None): 281 try: 282 return attempt() 283 except BrokenPipeError as e: 284 raise e 285 except Exception as e: 286 if callable(fallback): 287 return fallback(e) 288 return fallback 289 290 rescued = rescue 291 292 def retype(x): 293 'Try to narrow the type of the value given.' 294 295 if isinstance(x, float): 296 n = int(x) 297 return n if float(n) == x else x 298 299 if not isinstance(x, str): 300 return x 301 302 try: 303 return loads(x) 304 except Exception: 305 pass 306 307 try: 308 return int(x) 309 except Exception: 310 pass 311 312 try: 313 return float(x) 314 except Exception: 315 pass 316 317 return x 318 319 autocast = autocasted = mold = molded = recast = recasted = remold = retype 320 remolded = retyped = retype 321 322 def typeof(x): 323 # return str(type(x)) 324 return { 325 type(None): 'null', 326 bool: 'boolean', 327 dict: 'object', 328 float: 'number', 329 int: 'number', 330 str: 'string', 331 list: 'array', 332 tuple: 'array', 333 }.get(type(x), 'other') 334 335 jstype = typeof 336 337 338 def result_needs_fixing(x): 339 if isinstance(x, float): 340 return not isnan(x) 341 if x is None or isinstance(x, (bool, int, float, str)): 342 return False 343 rec = result_needs_fixing 344 if isinstance(x, dict): 345 return any(rec(k) or rec(v) for k, v in x.items()) 346 if isinstance(x, (list, tuple)): 347 return any(rec(e) for e in x) 348 return True 349 350 def fix_result(x, default): 351 if x is type: 352 return type(default).__name__ 353 354 # if expression results in a func, auto-call it with the original data 355 if callable(x): 356 x = x(default) 357 358 if isinstance(x, float) and isnan(x): 359 return None 360 361 if x is None or isinstance(x, (bool, int, float, str)): 362 return x 363 364 rec = fix_result 365 366 if isinstance(x, dict): 367 return { 368 rec(k, default): rec(v, default) for k, v in x.items() if not 369 (isinstance(k, Skip) or isinstance(v, Skip)) 370 } 371 372 if isinstance(x, Iterable): 373 return tuple(rec(e, default) for e in x if not isinstance(e, Skip)) 374 375 if isinstance(x, Dottable): 376 return rec(x.__dict__, default) 377 378 if isinstance(x, Exception): 379 raise x 380 381 return None if isinstance(x, Skip) else str(x) 382 383 def fail(msg, code = 1): 384 print(str(msg), file=stderr) 385 exit(code) 386 387 def message(msg, result = None): 388 print(msg, file=stderr) 389 return result 390 391 msg = message 392 393 def seemsurl(path): 394 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 395 return any(path.startswith(p) for p in protocols) 396 397 def matchkey(kv, key): 398 if key in kv: 399 return key 400 401 low = key.lower() 402 for k in kv.keys(): 403 if low == k.lower(): 404 return k 405 406 try: 407 i = int(key) 408 l = len(kv) 409 if i < 0: 410 i += l 411 412 if not (-l <= i < l): 413 return key 414 415 for j, k in enumerate(kv.keys()): 416 if i == j: 417 return k 418 except Exception: 419 return key 420 421 return key 422 423 def zoom(data, keys): 424 for i, k in enumerate(keys): 425 if isinstance(data, dict): 426 # m = matchkey(data, k) 427 # if not (m in data): 428 # raise Exception(f'{m}: object doesn\'t have that key') 429 data = data.get(matchkey(data, k), None) 430 continue 431 432 if isinstance(data, (list, tuple)): 433 if k == '+': 434 pick = keys[i + 1:] 435 return [{k: e.get(k, None) for k in pick} 436 for e in data if isinstance(e, dict)] 437 if k == '-': 438 avoid = set(keys[i + 1:]) 439 return [{k: v for (k, v) in e.items() if not (k in avoid)} 440 for e in data if isinstance(e, dict)] 441 if k == '.': 442 rest = keys[i + 1:] 443 return [zoom(e, rest) for e in data] 444 445 try: 446 k = int(k) 447 l = len(data) 448 data = data[k] if -l <= k < l else None 449 except Exception: 450 # raise Exception(f'{k}: arrays don\'t have keys like objects') 451 data = None 452 continue 453 454 # return None 455 # data = None 456 raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}') 457 458 return data 459 460 def make_eval_once(run): 461 def eval_once(expr): 462 global eval 463 eval = None 464 return run(expr) 465 return eval_once 466 467 468 cr = '\r' 469 crlf = '\r\n' 470 dquo = dquote = '"' 471 empty = '' 472 lcurly = '{' 473 lf = '\n' 474 rcurly = '}' 475 squo = squote = '\'' 476 477 nil = none = null = None 478 479 480 no_input_opts = ( 481 '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 482 ) 483 compact_output_opts = ( 484 '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0', 485 ) 486 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots') 487 modules_opts = ( 488 '-m', '--m', '-mod', '--mod', '-module', '--module', 489 '-modules', '--modules', 490 ) 491 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 492 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 493 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom') 494 495 args = argv[1:] 496 no_input = False 497 zoom_stdin = False 498 pipe_mode = False 499 trace_errors = False 500 dottable_input = False 501 compact_output = False 502 503 while len(args) > 0: 504 if args[0] == '--': 505 args = args[1:] 506 break 507 508 if args[0] in no_input_opts: 509 no_input = True 510 args = args[1:] 511 continue 512 513 if args[0] in compact_output_opts: 514 compact_output = True 515 args = args[1:] 516 continue 517 518 if args[0] in dot_opts: 519 dottable_input = True 520 args = args[1:] 521 continue 522 523 if args[0] in pipe_opts: 524 pipe_mode = True 525 args = args[1:] 526 break 527 528 if args[0] in modules_opts: 529 try: 530 if len(args) < 2: 531 msg = 'a module name or a comma-separated list of modules' 532 raise Exception('expected ' + msg) 533 534 g = globals() 535 from importlib import import_module 536 for e in args[1].split(','): 537 g[e] = import_module(e) 538 539 g = None 540 import_module = None 541 args = args[2:] 542 except Exception as e: 543 fail(e, 1) 544 545 continue 546 547 if args[0] in trace_opts: 548 trace_errors = True 549 args = args[1:] 550 continue 551 552 if args[0] in zoom_opts: 553 zoom_stdin = True 554 args = args[1:] 555 break 556 557 break 558 559 560 try: 561 if zoom_stdin: 562 data = load(stdin) 563 data = zoom(data, args) 564 else: 565 expr = 'data' 566 if len(args) > 0 and (not pipe_mode): 567 expr = args[0] 568 args = args[1:] 569 570 if expr == '.': 571 expr = 'data' 572 if not pipe_mode: 573 expr = compile(expr, expr, mode='eval') 574 575 if (not pipe_mode) and len(args) > 1: 576 raise Exception('can\'t use more than 1 input') 577 path = '-' if len(args) == 0 or pipe_mode else args[0] 578 579 if no_input: 580 data = None 581 elif path == '-': 582 data = load(stdin) 583 elif seemsurl(path): 584 from io import TextIOWrapper 585 from urllib.request import urlopen 586 with urlopen(path) as inp: 587 with TextIOWrapper(inp, encoding='utf-8') as txt: 588 data = load(txt) 589 else: 590 with open(path, encoding='utf-8') as inp: 591 data = load(inp) 592 593 if dottable_input: 594 data = dotate(data) 595 596 v = val = value = d = dat = data 597 exec = None 598 open = None 599 compile = None 600 601 if pipe_mode: 602 funcs = [eval(s) for s in args] 603 eval = None 604 605 # variable names `o` and `p` work like in the `pyp` tool, except 606 # the pipeline steps were given as separate cmd-line arguments 607 global o, p 608 609 o = p = prev = v 610 for f in funcs: 611 p = f(p) 612 if callable(p): 613 p = p(prev) 614 prev = p 615 v = p 616 else: 617 eval = make_eval_once(eval) 618 v = eval(expr) 619 620 if result_needs_fixing(v): 621 v = fix_result(v, value) 622 623 if compact_output: 624 dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False) 625 else: 626 dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False) 627 print() 628 except BrokenPipeError: 629 # quit quietly, instead of showing a confusing error message 630 stderr.close() 631 exit(0) 632 except KeyboardInterrupt: 633 exit(2) 634 except Exception as e: 635 if trace_errors: 636 raise e 637 else: 638 fail(e, 1)