File: tjp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tjp [options...] [python expression] [file/URI...] 28 29 30 Transform Json with Python runs a python expression on a single JSON-encoded 31 input. 32 33 The expression can use either `v`, `value`, `d`, or `data` for the decoded 34 input. Invalid-JSON inputs result in an error, with no chance of recovery. 35 36 Input-sources can be either files or web-URIs. When not given a named input, 37 the standard input is used. 38 39 40 Examples 41 42 # numbers from 0 to 5; no input is read/used 43 tjp = 'range(6)' 44 45 # using bases 1 to 5, find all their powers up to the 4th 46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))' 47 48 # keep only the last 2 items from the input 49 tjp = 'range(1, 6)' | tjp 'data[-2:]' 50 51 # chunk/regroup input items into arrays of up to 3 items each 52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)' 53 54 # ignore errors/exceptions, in favor of a fallback value 55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")' 56 57 # ignore errors/exceptions, calling a fallback func with the exception 58 tjp = 'rescue(lambda: 2 * float("no way"), str)' 59 60 # use dot-syntax on JSON data 61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz' 62 63 # use dot-syntax on JSON data; keywords as properties are syntax-errors 64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]' 65 66 # func results are automatically called on the input 67 tjp = '{"abc": 123, "def": 456}' | tjp len 68 69 # an array of 10 random integers between 1 and 10 70 tjp -m random = '(random.randint(1, 10) for _ in range(10))' 71 ''' 72 73 74 from itertools import islice 75 from json import dump, load, loads 76 from math import isnan 77 from re import compile as compile_uncached, IGNORECASE 78 from sys import argv, exit, stderr, stdin, stdout 79 from typing import Iterable 80 81 82 if len(argv) < 2: 83 print(info.strip(), file=stderr) 84 exit(0) 85 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 86 print(info.strip()) 87 exit(0) 88 89 90 class Skip: 91 pass 92 93 skip = Skip() 94 95 class Dottable: 96 'Enable convenient dot-syntax access to dictionary values.' 97 98 def __getattr__(self, key): 99 return self.__dict__[key] if key in self.__dict__ else None 100 101 def __getitem__(self, key): 102 return self.__dict__[key] if key in self.__dict__ else None 103 104 def __iter__(self): 105 return iter(self.__dict__) 106 107 def dotate(x): 108 'Recursively ensure all dictionaries in a value are dot-accessible.' 109 110 if isinstance(x, dict): 111 d = Dottable() 112 d.__dict__ = {k: dotate(v) for k, v in x.items()} 113 return d 114 if isinstance(x, list): 115 return [dotate(e) for e in x] 116 if isinstance(x, tuple): 117 return tuple(dotate(e) for e in x) 118 return x 119 120 dotated = dotate 121 dote = dotate 122 doted = dotate 123 dotified = dotate 124 dotify = dotate 125 dottified = dotate 126 dottify = dotate 127 128 def chunk(items, chunk_size): 129 'Break iterable into chunks, each with up to the item-count given.' 130 131 if isinstance(items, str): 132 n = len(items) 133 while n >= chunk_size: 134 yield items[:chunk_size] 135 items = items[chunk_size:] 136 n -= chunk_size 137 if n > 0: 138 yield items 139 return 140 141 if not isinstance(chunk_size, int): 142 raise Exception('non-integer chunk-size') 143 if chunk_size < 1: 144 raise Exception('non-positive chunk-size') 145 146 it = iter(items) 147 while True: 148 head = tuple(islice(it, chunk_size)) 149 if not head: 150 return 151 yield head 152 153 chunked = chunk 154 155 # re_cache is used by custom func compile to cache previously-compiled 156 # regular-expressions, which makes them quicker to (re)use in formulas 157 re_cache = {} 158 159 def re_compile(expr, flags = 0): 160 'Speed-up using regexes, by avoiding recompilations.' 161 162 if flags in re_cache: 163 cache = re_cache[flags] 164 else: 165 cache = {} 166 re_cache[flags] = cache 167 if expr in cache: 168 return cache[expr] 169 170 pat = compile_uncached(expr, flags) 171 cache[expr] = pat 172 return pat 173 174 def icompile(expr): 175 return re_compile(expr, IGNORECASE) 176 177 def cond(*args): 178 if len(args) == 0: 179 return None 180 181 for i, e in enumerate(args): 182 if i % 2 == 0 and i < len(args) - 1 and e: 183 return args[i + 1] 184 185 return args[-1] if len(args) % 2 == 1 else None 186 187 def dive(into, using): 188 'Depth-first recursive caller for 1-input functions.' 189 190 if callable(into): 191 into, using = using, into 192 193 def rec(v): 194 if isinstance(v, dict): 195 return {k: rec(v) for k, v in v.items()} 196 if isinstance(v, Iterable) and not isinstance(v, str): 197 return [rec(v) for v in v] 198 return using(v) 199 200 return rec(into) 201 202 def divekeys(into, using): 203 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 204 205 if callable(into): 206 into, using = using, into 207 208 def rec(v): 209 if isinstance(v, dict): 210 return {using(k): rec(v) for k, v in v.items()} 211 if isinstance(v, Iterable) and not isinstance(v, str): 212 return [rec(v) for i, v in enumerate(v)] 213 return v 214 215 return rec(None, into) 216 217 def divekv(into, using, using2 = None): 218 'Depth-first recursive caller for 2-input functions.' 219 220 if using2 is None: 221 if callable(into): 222 into, using = using, into 223 else: 224 if not callable(using2): 225 into, using, using2 = using2, into, using 226 227 def rec(k, v): 228 if isinstance(v, dict): 229 return {k: rec(k, v) for k, v in v.items()} 230 if isinstance(v, Iterable) and not isinstance(v, str): 231 return [rec(i, v) for i, v in enumerate(v)] 232 return using(k, v) 233 234 def rec2(k, v): 235 if isinstance(v, dict): 236 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 237 if isinstance(v, Iterable) and not isinstance(v, str): 238 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 239 return [rec2(i, v) for i, v in enumerate(v)] 240 return using2(k, v) 241 242 return rec(None, into) if using2 is None else rec2(None, into) 243 244 kvdive = divekv 245 246 def drop(src, *what): 247 if isinstance(src, str): 248 for s in what: 249 src = src.replace(s, '') 250 return src 251 252 def kdrop(src, what): 253 return {k: v for (k, v) in src.items() if not (k in what)} 254 255 if isinstance(src, dict): 256 return kdrop(src, set(what)) 257 258 if isinstance(src, Iterable): 259 what = set(what) 260 return [kdrop(e, what) for e in src if isinstance(e, dict)] 261 262 return None 263 264 dropped = drop 265 266 def join(x, y = ' '): 267 'Join values into a string, or make a dict from keys and values.' 268 269 if isinstance(x, str): 270 return x.join(str(v) for v in y) 271 if isinstance(y, str): 272 return y.join(str(v) for v in x) 273 return {k: v for k, v in zip(x, y)} 274 275 def pick(src, *keys): 276 if isinstance(src, dict): 277 return {k: src.get(k, None) for k in keys} 278 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 279 280 picked = pick 281 282 def rescue(attempt, fallback = None): 283 try: 284 return attempt() 285 except Exception as e: 286 if callable(fallback): 287 return fallback(e) 288 return fallback 289 290 catch = rescue 291 recover = rescue 292 rescued = rescue 293 294 def retype(x): 295 'Try to narrow the type of the value given.' 296 297 if isinstance(x, float): 298 n = int(x) 299 return n if float(n) == x else x 300 301 if not isinstance(x, str): 302 return x 303 304 try: 305 return loads(x) 306 except Exception: 307 pass 308 309 try: 310 return int(x) 311 except Exception: 312 pass 313 314 try: 315 return float(x) 316 except Exception: 317 pass 318 319 return x 320 321 autocast = retype 322 autocasted = retype 323 mold = retype 324 molded = retype 325 recast = retype 326 recasted = retype 327 remold = retype 328 remolded = retype 329 retyped = retype 330 331 def typeof(x): 332 # return str(type(x)) 333 return { 334 type(None): 'null', 335 bool: 'boolean', 336 dict: 'object', 337 float: 'number', 338 int: 'number', 339 str: 'string', 340 list: 'array', 341 tuple: 'array', 342 }.get(type(x), 'other') 343 344 jstype = typeof 345 346 347 def result_needs_fixing(x): 348 if isinstance(x, float): 349 return not isnan(x) 350 if x is None or isinstance(x, (bool, int, float, str)): 351 return False 352 rec = result_needs_fixing 353 if isinstance(x, dict): 354 return any(rec(k) or rec(v) for k, v in x.items()) 355 if isinstance(x, (list, tuple)): 356 return any(rec(e) for e in x) 357 return True 358 359 def fix_result(x, default): 360 if x is type: 361 return type(default).__name__ 362 363 # if expression results in a func, auto-call it with the original data 364 if callable(x): 365 x = x(default) 366 367 if isinstance(x, float) and isnan(x): 368 return None 369 370 if x is None or isinstance(x, (bool, int, float, str)): 371 return x 372 373 rec = fix_result 374 375 if isinstance(x, dict): 376 return { 377 rec(k, default): rec(v, default) for k, v in x.items() if not 378 (isinstance(k, Skip) or isinstance(v, Skip)) 379 } 380 381 if isinstance(x, Iterable): 382 return tuple(rec(e, default) for e in x if not isinstance(e, Skip)) 383 384 if isinstance(x, Dottable): 385 return rec(x.__dict__, default) 386 387 if isinstance(x, Exception): 388 raise x 389 390 return None if isinstance(x, Skip) else str(x) 391 392 def fail(msg, code = 1): 393 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 394 exit(code) 395 396 def message(msg, result = None): 397 print(msg, file=stderr) 398 return result 399 400 msg = message 401 402 def seemsurl(path): 403 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 404 return any(path.startswith(p) for p in protocols) 405 406 def matchkey(kv, key): 407 if key in kv: 408 return key 409 410 low = key.lower() 411 for k in kv.keys(): 412 if low == k.lower(): 413 return k 414 415 try: 416 i = int(key) 417 l = len(kv) 418 if i < 0: 419 i += l 420 421 if not (-l <= i < l): 422 return key 423 424 for j, k in enumerate(kv.keys()): 425 if i == j: 426 return k 427 except Exception: 428 return key 429 430 return key 431 432 def zoom(data, keys): 433 for i, k in enumerate(keys): 434 if isinstance(data, dict): 435 # m = matchkey(data, k) 436 # if not (m in data): 437 # raise Exception(f'{m}: object doesn\'t have that key') 438 data = data.get(matchkey(data, k), None) 439 continue 440 441 if isinstance(data, (list, tuple)): 442 if k == '+': 443 pick = keys[i + 1:] 444 return [{k: e.get(k, None) for k in pick} 445 for e in data if isinstance(e, dict)] 446 if k == '-': 447 avoid = set(keys[i + 1:]) 448 return [{k: v for (k, v) in e.items() if not (k in avoid)} 449 for e in data if isinstance(e, dict)] 450 if k == '.': 451 rest = keys[i + 1:] 452 return [zoom(e, rest) for e in data] 453 454 try: 455 k = int(k) 456 l = len(data) 457 data = data[k] if -l <= k < l else None 458 except Exception: 459 # raise Exception(f'{k}: arrays don\'t have keys like objects') 460 data = None 461 continue 462 463 # return None 464 # data = None 465 raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}') 466 467 return data 468 469 def make_eval_once(run): 470 def eval_once(expr): 471 global eval 472 eval = None 473 return run(expr) 474 return eval_once 475 476 477 cr = '\r' 478 crlf = '\r\n' 479 dquo = '"' 480 dquote = '"' 481 empty = '' 482 lcurly = '{' 483 lf = '\n' 484 rcurly = '}' 485 s = '' 486 squo = '\'' 487 squote = '\'' 488 # utf8bom = '\xef\xbb\xbf' 489 490 nil = None 491 none = None 492 null = None 493 494 495 no_input_opts = ( 496 '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 497 ) 498 compact_output_opts = ( 499 '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0', 500 ) 501 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots') 502 modules_opts = ( 503 '-m', '--m', '-mod', '--mod', '-module', '--module', 504 '-modules', '--modules', 505 ) 506 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 507 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 508 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom') 509 510 args = argv[1:] 511 no_input = False 512 zoom_stdin = False 513 pipe_mode = False 514 trace_errors = False 515 dottable_input = False 516 compact_output = False 517 518 while len(args) > 0: 519 if args[0] == '--': 520 args = args[1:] 521 break 522 523 if args[0] in no_input_opts: 524 no_input = True 525 args = args[1:] 526 continue 527 528 if args[0] in compact_output_opts: 529 compact_output = True 530 args = args[1:] 531 continue 532 533 if args[0] in dot_opts: 534 dottable_input = True 535 args = args[1:] 536 continue 537 538 if args[0] in pipe_opts: 539 pipe_mode = True 540 args = args[1:] 541 break 542 543 if args[0] in modules_opts: 544 try: 545 if len(args) < 2: 546 msg = 'a module name or a comma-separated list of modules' 547 raise Exception('expected ' + msg) 548 549 g = globals() 550 from importlib import import_module 551 for e in args[1].split(','): 552 g[e] = import_module(e) 553 554 g = None 555 import_module = None 556 args = args[2:] 557 except Exception as e: 558 fail(e, 1) 559 560 continue 561 562 if args[0] in trace_opts: 563 trace_errors = True 564 args = args[1:] 565 continue 566 567 if args[0] in zoom_opts: 568 zoom_stdin = True 569 args = args[1:] 570 break 571 572 break 573 574 575 try: 576 if zoom_stdin: 577 data = load(stdin) 578 data = zoom(data, args) 579 else: 580 expr = 'data' 581 if len(args) > 0 and (not pipe_mode): 582 expr = args[0] 583 args = args[1:] 584 585 if expr == '.': 586 expr = 'data' 587 if not pipe_mode: 588 expr = compile(expr, expr, mode='eval') 589 590 if (not pipe_mode) and len(args) > 1: 591 raise Exception('can\'t use more than 1 input') 592 path = '-' if len(args) == 0 or pipe_mode else args[0] 593 594 if no_input: 595 data = None 596 elif path == '-': 597 data = load(stdin) 598 elif seemsurl(path): 599 from io import TextIOWrapper 600 from urllib.request import urlopen 601 with urlopen(path) as inp: 602 with TextIOWrapper(inp, encoding='utf-8') as txt: 603 data = load(txt) 604 else: 605 with open(path, encoding='utf-8') as inp: 606 data = load(inp) 607 608 if dottable_input: 609 data = dotate(data) 610 611 v = val = value = d = dat = data 612 exec = None 613 open = None 614 compile = None 615 616 if pipe_mode: 617 funcs = [eval(s) for s in args] 618 eval = None 619 620 # variable names `o` and `p` work like in the `pyp` tool, except 621 # the pipeline steps were given as separate cmd-line arguments 622 global o, p 623 624 o = p = prev = v 625 for f in funcs: 626 p = f(p) 627 if callable(p): 628 p = p(prev) 629 prev = p 630 v = p 631 else: 632 eval = make_eval_once(eval) 633 v = eval(expr) 634 635 if result_needs_fixing(v): 636 v = fix_result(v, value) 637 638 if compact_output: 639 dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False) 640 else: 641 dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False) 642 print() 643 except BrokenPipeError: 644 # quit quietly, instead of showing a confusing error message 645 stderr.close() 646 exit(0) 647 except KeyboardInterrupt: 648 exit(2) 649 except Exception as e: 650 if trace_errors: 651 raise e 652 else: 653 fail(e, 1)