File: tjp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tjp [options...] [python expression] [file/URI...] 28 29 30 Transform Json with Python runs a python expression on a single JSON-encoded 31 input. 32 33 The expression can use either `v`, `value`, `d`, or `data` for the decoded 34 input. Invalid-JSON inputs result in an error, with no chance of recovery. 35 36 Input-sources can be either files or web-URIs. When not given a named input, 37 the standard input is used. 38 39 40 Examples 41 42 # numbers from 0 to 5; no input is read/used 43 tjp = 'range(6)' 44 45 # using bases 1 to 5, find all their powers up to the 4th 46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))' 47 48 # keep only the last 2 items from the input 49 tjp = 'range(1, 6)' | tjp 'data[-2:]' 50 51 # chunk/regroup input items into arrays of up to 3 items each 52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)' 53 54 # ignore errors/exceptions, in favor of a fallback value 55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")' 56 57 # ignore errors/exceptions, calling a fallback func with the exception 58 tjp = 'rescue(lambda: 2 * float("no way"), str)' 59 60 # use dot-syntax on JSON data 61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz' 62 63 # use dot-syntax on JSON data; keywords as properties are syntax-errors 64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]' 65 66 # func results are automatically called on the input 67 tjp = '{"abc": 123, "def": 456}' | tjp len 68 ''' 69 70 71 from itertools import islice 72 from json import dump, load, loads 73 compile_py = compile 74 from math import isnan 75 from re import compile as compile_uncached, IGNORECASE 76 from sys import argv, exit, stderr, stdin, stdout 77 from typing import Iterable 78 79 80 if len(argv) < 2: 81 print(info.strip(), file=stderr) 82 exit(0) 83 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 84 print(info.strip()) 85 exit(0) 86 87 88 class Skip: 89 pass 90 91 92 skip = Skip() 93 94 95 class Dottable: 96 'Enable convenient dot-syntax access to dictionary values.' 97 98 def __getattr__(self, key): 99 return self.__dict__[key] if key in self.__dict__ else None 100 101 def __getitem__(self, key): 102 return self.__dict__[key] if key in self.__dict__ else None 103 104 def __iter__(self): 105 return iter(self.__dict__) 106 107 def dotate(x): 108 'Recursively ensure all dictionaries in a value are dot-accessible.' 109 110 if isinstance(x, dict): 111 d = Dottable() 112 d.__dict__ = {k: dotate(v) for k, v in x.items()} 113 return d 114 if isinstance(x, list): 115 return [dotate(e) for e in x] 116 if isinstance(x, tuple): 117 return tuple(dotate(e) for e in x) 118 return x 119 120 dotated = dotate 121 dote = dotate 122 doted = dotate 123 dotified = dotate 124 dotify = dotate 125 dottified = dotate 126 dottify = dotate 127 128 129 def chunk(items, chunk_size): 130 'Break iterable into chunks, each with up to the item-count given.' 131 132 if isinstance(items, str): 133 n = len(items) 134 while n >= chunk_size: 135 yield items[:chunk_size] 136 items = items[chunk_size:] 137 n -= chunk_size 138 if n > 0: 139 yield items 140 return 141 142 if not isinstance(chunk_size, int): 143 raise Exception('non-integer chunk-size') 144 if chunk_size < 1: 145 raise Exception('non-positive chunk-size') 146 147 it = iter(items) 148 while True: 149 head = tuple(islice(it, chunk_size)) 150 if not head: 151 return 152 yield head 153 154 chunked = chunk 155 156 157 # re_cache is used by custom func compile to cache previously-compiled 158 # regular-expressions, which makes them quicker to (re)use in formulas 159 re_cache = {} 160 161 # ire_cache is like re_cache, except it's for case-insensitive regexes 162 ire_cache = {} 163 164 165 def compile(expr, flags = 0): 166 'Speed-up using regexes across lines, by avoiding recompilations.' 167 168 if flags != 0 and flags != IGNORECASE: 169 msg = 'only the default and case-insensitive options are supported' 170 raise Exception(msg) 171 172 cache = re_cache if flags == 0 else ire_cache 173 if expr in cache: 174 return cache[expr] 175 176 pat = compile_uncached(expr, flags) 177 cache[expr] = pat 178 return pat 179 180 181 def icompile(expr): 182 return compile(expr, IGNORECASE) 183 184 185 def cond(*args): 186 if len(args) == 0: 187 return None 188 189 for i, e in enumerate(args): 190 if i % 2 == 0 and i < len(args) - 1 and e: 191 return args[i + 1] 192 193 return args[-1] if len(args) % 2 == 1 else None 194 195 196 def dive(into, using): 197 'Depth-first recursive caller for 1-input functions.' 198 199 if callable(into): 200 into, using = using, into 201 202 def rec(v): 203 if isinstance(v, dict): 204 return {k: rec(v) for k, v in v.items()} 205 if isinstance(v, Iterable) and not isinstance(v, str): 206 return [rec(v) for v in v] 207 return using(v) 208 209 return rec(into) 210 211 212 def divekeys(into, using): 213 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 214 215 if callable(into): 216 into, using = using, into 217 218 def rec(v): 219 if isinstance(v, dict): 220 return {using(k): rec(v) for k, v in v.items()} 221 if isinstance(v, Iterable) and not isinstance(v, str): 222 return [rec(v) for i, v in enumerate(v)] 223 return v 224 225 return rec(None, into) 226 227 228 def divekv(into, using, using2 = None): 229 'Depth-first recursive caller for 2-input functions.' 230 231 if using2 is None: 232 if callable(into): 233 into, using = using, into 234 else: 235 if not callable(using2): 236 into, using, using2 = using2, into, using 237 238 def rec(k, v): 239 if isinstance(v, dict): 240 return {k: rec(k, v) for k, v in v.items()} 241 if isinstance(v, Iterable) and not isinstance(v, str): 242 return [rec(i, v) for i, v in enumerate(v)] 243 return using(k, v) 244 245 def rec2(k, v): 246 if isinstance(v, dict): 247 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 248 if isinstance(v, Iterable) and not isinstance(v, str): 249 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 250 return [rec2(i, v) for i, v in enumerate(v)] 251 return using2(k, v) 252 253 return rec(None, into) if using2 is None else rec2(None, into) 254 255 kvdive = divekv 256 257 258 def drop(src, *what): 259 if isinstance(src, str): 260 for s in what: 261 src = src.replace(s, '') 262 return src 263 264 def kdrop(src, what): 265 return {k: v for (k, v) in src.items() if not (k in what)} 266 267 if isinstance(src, dict): 268 return kdrop(src, set(what)) 269 270 if isinstance(src, Iterable): 271 what = set(what) 272 return [kdrop(e, what) for e in src if isinstance(e, dict)] 273 274 return None 275 276 dropped = drop 277 278 279 def join(x, y = ' '): 280 'Join values into a string, or make a dict from keys and values.' 281 282 if isinstance(x, str): 283 return x.join(str(v) for v in y) 284 if isinstance(y, str): 285 return y.join(str(v) for v in x) 286 return {k: v for k, v in zip(x, y)} 287 288 289 def pick(src, *keys): 290 if isinstance(src, dict): 291 return {k: src.get(k, None) for k in keys} 292 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 293 294 picked = pick 295 296 297 def rescue(attempt, fallback = None): 298 try: 299 return attempt() 300 except Exception as e: 301 if callable(fallback): 302 return fallback(e) 303 return fallback 304 305 catch = rescue 306 catched = rescue 307 caught = rescue 308 recover = rescue 309 recovered = rescue 310 rescued = rescue 311 312 313 def retype(x): 314 'Try to narrow the type of the value given.' 315 316 if isinstance(x, float): 317 n = int(x) 318 return n if float(n) == x else x 319 320 if not isinstance(x, str): 321 return x 322 323 try: 324 return loads(x) 325 except Exception: 326 pass 327 328 try: 329 return int(x) 330 except Exception: 331 pass 332 333 try: 334 return float(x) 335 except Exception: 336 pass 337 338 return x 339 340 autocast = retype 341 autocasted = retype 342 mold = retype 343 molded = retype 344 recast = retype 345 recasted = retype 346 remold = retype 347 remolded = retype 348 retyped = retype 349 350 351 def typeof(x): 352 # return str(type(x)) 353 return { 354 type(None): 'null', 355 bool: 'boolean', 356 dict: 'object', 357 float: 'number', 358 int: 'number', 359 str: 'string', 360 list: 'array', 361 tuple: 'array', 362 }.get(type(x), 'other') 363 364 jstype = typeof 365 366 367 def result_needs_fixing(x): 368 if isinstance(x, float): 369 return not isnan(x) 370 if x is None or isinstance(x, (bool, int, float, str)): 371 return False 372 rec = result_needs_fixing 373 if isinstance(x, dict): 374 return any(rec(k) or rec(v) for k, v in x.items()) 375 if isinstance(x, (list, tuple)): 376 return any(rec(e) for e in x) 377 return True 378 379 380 def fix_result(x, default): 381 if x is type: 382 return type(default).__name__ 383 384 # if expression results in a func, auto-call it with the original data 385 if callable(x): 386 x = x(default) 387 388 if isinstance(x, float) and isnan(x): 389 return None 390 391 if x is None or isinstance(x, (bool, int, float, str)): 392 return x 393 394 rec = fix_result 395 396 if isinstance(x, dict): 397 return { 398 rec(k, default): rec(v, default) for k, v in x.items() if not 399 (isinstance(k, Skip) or isinstance(v, Skip)) 400 } 401 402 if isinstance(x, Iterable): 403 return tuple(rec(e, default) for e in x if not isinstance(e, Skip)) 404 405 if isinstance(x, Dottable): 406 return rec(x.__dict__, default) 407 408 if isinstance(x, Exception): 409 raise x 410 411 return None if isinstance(x, Skip) else str(x) 412 413 414 def fail(msg, code = 1): 415 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 416 exit(code) 417 418 419 def message(msg, result = None): 420 print(msg, file=stderr) 421 return result 422 423 msg = message 424 425 426 def seemsurl(path): 427 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 428 return any(path.startswith(p) for p in protocols) 429 430 431 def matchkey(kv, key): 432 if key in kv: 433 return key 434 435 low = key.lower() 436 for k in kv.keys(): 437 if low == k.lower(): 438 return k 439 440 try: 441 i = int(key) 442 l = len(kv) 443 if i < 0: 444 i += l 445 446 if not (-l <= i < l): 447 return key 448 449 for j, k in enumerate(kv.keys()): 450 if i == j: 451 return k 452 except Exception: 453 return key 454 455 return key 456 457 458 def zoom(data, keys): 459 for i, k in enumerate(keys): 460 if isinstance(data, dict): 461 # m = matchkey(data, k) 462 # if not (m in data): 463 # raise Exception(f'{m}: object doesn\'t have that key') 464 data = data.get(matchkey(data, k), None) 465 continue 466 467 if isinstance(data, (list, tuple)): 468 if k == '+': 469 pick = keys[i + 1:] 470 return [{k: e.get(k, None) for k in pick} 471 for e in data if isinstance(e, dict)] 472 if k == '-': 473 avoid = set(keys[i + 1:]) 474 return [{k: v for (k, v) in e.items() if not (k in avoid)} 475 for e in data if isinstance(e, dict)] 476 if k == '.': 477 rest = keys[i + 1:] 478 return [zoom(e, rest) for e in data] 479 480 try: 481 k = int(k) 482 l = len(data) 483 data = data[k] if -l <= k < l else None 484 except Exception: 485 # raise Exception(f'{k}: arrays don\'t have keys like objects') 486 data = None 487 continue 488 489 # return None 490 # data = None 491 raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}') 492 493 return data 494 495 496 def make_eval_once(run): 497 def eval_once(expr): 498 global eval 499 eval = None 500 return run(expr) 501 return eval_once 502 503 eval = make_eval_once(eval) 504 505 506 cr = '\r' 507 crlf = '\r\n' 508 dquo = '"' 509 dquote = '"' 510 empty = '' 511 lcurly = '{' 512 lf = '\n' 513 rcurly = '}' 514 s = '' 515 squo = '\'' 516 squote = '\'' 517 # utf8bom = '\xef\xbb\xbf' 518 519 nil = None 520 none = None 521 null = None 522 523 524 no_input_opts = ( 525 '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 526 ) 527 compact_output_opts = ( 528 '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0', 529 ) 530 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots') 531 modules_opts = ( 532 '-m', '--m', '-mod', '--mod', '-module', '--module', 533 '-modules', '--modules', 534 ) 535 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 536 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom') 537 538 args = argv[1:] 539 no_input = False 540 zoom_stdin = False 541 trace_errors = False 542 dottable_input = False 543 compact_output = False 544 545 while len(args) > 0: 546 if args[0] in no_input_opts: 547 no_input = True 548 args = args[1:] 549 continue 550 551 if args[0] in compact_output_opts: 552 compact_output = True 553 args = args[1:] 554 continue 555 556 if args[0] in dot_opts: 557 dottable_input = True 558 args = args[1:] 559 continue 560 561 if args[0] in modules_opts: 562 try: 563 if len(args) < 2: 564 msg = 'a module name or a comma-separated list of modules' 565 raise Exception('expected ' + msg) 566 567 g = globals() 568 from importlib import import_module 569 for e in args[1].split(','): 570 g[e] = import_module(e) 571 572 g = None 573 import_module = None 574 args = args[2:] 575 except Exception as e: 576 fail(e, 1) 577 578 continue 579 580 if args[0] in trace_opts: 581 trace_errors = True 582 args = args[1:] 583 continue 584 585 if args[0] in zoom_opts: 586 zoom_stdin = True 587 args = args[1:] 588 break 589 590 break 591 592 593 try: 594 if zoom_stdin: 595 data = load(stdin) 596 data = zoom(data, args) 597 else: 598 expr = 'data' 599 if len(args) > 0: 600 expr = args[0] 601 args = args[1:] 602 603 if expr == '.': 604 expr = 'data' 605 expr = compile_py(expr, expr, mode='eval') 606 607 if len(args) > 1: 608 raise Exception('can\'t use more than 1 input') 609 path = '-' if len(args) == 0 else args[0] 610 611 if no_input: 612 data = None 613 elif path == '-': 614 data = load(stdin) 615 elif seemsurl(path): 616 from io import TextIOWrapper 617 from urllib.request import urlopen 618 with urlopen(path) as inp: 619 with TextIOWrapper(inp, encoding='utf-8') as txt: 620 data = load(txt) 621 else: 622 with open(path, encoding='utf-8') as inp: 623 data = load(inp) 624 625 if dottable_input: 626 data = dotate(data) 627 628 v = val = value = d = dat = data 629 compile_py = None 630 exec = None 631 open = None 632 data = eval(expr) 633 634 if result_needs_fixing(data): 635 data = fix_result(data, data) 636 637 v = data 638 if compact_output: 639 dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False) 640 else: 641 dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False) 642 print() 643 except BrokenPipeError: 644 # quit quietly, instead of showing a confusing error message 645 stderr.close() 646 exit(0) 647 except KeyboardInterrupt: 648 exit(2) 649 except Exception as e: 650 if trace_errors: 651 raise e 652 else: 653 fail(e, 1)