File: tjp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tjp [options...] [python expression] [file/URI...] 28 29 30 Transform Json with Python runs a python expression on a single JSON-encoded 31 input. 32 33 The expression can use either `v`, `value`, `d`, or `data` for the decoded 34 input. Invalid-JSON inputs result in an error, with no chance of recovery. 35 36 Input-sources can be either files or web-URIs. When not given a named input, 37 the standard input is used. 38 39 40 Examples 41 42 # numbers from 0 to 5; no input is read/used 43 tjp = 'range(6)' 44 45 # using bases 1 to 5, find all their powers up to the 4th 46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))' 47 48 # keep only the last 2 items from the input 49 tjp = 'range(1, 6)' | tjp 'data[-2:]' 50 51 # chunk/regroup input items into arrays of up to 3 items each 52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)' 53 54 # ignore errors/exceptions, in favor of a fallback value 55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")' 56 57 # ignore errors/exceptions, calling a fallback func with the exception 58 tjp = 'rescue(lambda: 2 * float("no way"), str)' 59 60 # use dot-syntax on JSON data 61 tjp = '{"abc": {"xyz": 123}}' | tjp -dots 'data.abc.xyz' 62 63 # use dot-syntax on JSON data; keywords as properties are syntax-errors 64 tjp = '{"abc": {"def": 123}}' | tjp -dots 'data.abc["def"]' 65 66 # func results are automatically called on the input 67 tjp = '{"abc": 123, "def": 456}' | tjp len 68 69 # an array of 10 random integers between 1 and 10 70 tjp -m random = '(random.randint(1, 10) for _ in range(10))' 71 72 # try to auto-parse values (esp. numbers) from a table of string values 73 echo '[{"key": "abc", "val": "123"}, {"key": "xyz", "val": "no"}]' | \\ 74 tjp '[{k: rescue(lambda: loads(v), v) for k, v in e.items()} for e in v]' 75 ''' 76 77 78 from itertools import islice 79 from json import dump, load, loads 80 from math import isnan 81 from re import compile as compile_uncached, IGNORECASE 82 from sys import argv, exit, stderr, stdin, stdout 83 from typing import Iterable 84 85 86 if len(argv) < 2: 87 print(info.strip(), file=stderr) 88 exit(0) 89 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 90 print(info.strip()) 91 exit(0) 92 93 94 class Skip: 95 pass 96 97 skip = Skip() 98 99 class Dottable: 100 'Enable convenient dot-syntax access to dictionary values.' 101 102 def __getattr__(self, key): 103 return self.__dict__[key] if key in self.__dict__ else None 104 105 def __getitem__(self, key): 106 return self.__dict__[key] if key in self.__dict__ else None 107 108 def __iter__(self): 109 return iter(self.__dict__) 110 111 def dotate(x): 112 'Recursively ensure all dictionaries in a value are dot-accessible.' 113 114 if isinstance(x, dict): 115 d = Dottable() 116 d.__dict__ = {k: dotate(v) for k, v in x.items()} 117 return d 118 if isinstance(x, list): 119 return [dotate(e) for e in x] 120 if isinstance(x, tuple): 121 return tuple(dotate(e) for e in x) 122 return x 123 124 dotated = dote = doted = dotified = dotify = dottified = dottify = dotate 125 126 def chunk(items, chunk_size): 127 'Break iterable into chunks, each with up to the item-count given.' 128 129 if isinstance(items, str): 130 n = len(items) 131 while n >= chunk_size: 132 yield items[:chunk_size] 133 items = items[chunk_size:] 134 n -= chunk_size 135 if n > 0: 136 yield items 137 return 138 139 if not isinstance(chunk_size, int): 140 raise Exception('non-integer chunk-size') 141 if chunk_size < 1: 142 raise Exception('non-positive chunk-size') 143 144 it = iter(items) 145 while True: 146 head = tuple(islice(it, chunk_size)) 147 if not head: 148 return 149 yield head 150 151 chunked = chunk 152 153 # re_cache is used by custom func compile to cache previously-compiled 154 # regular-expressions, which makes them quicker to (re)use in formulas 155 re_cache = {} 156 157 def re_compile(expr, flags = 0): 158 'Speed-up using regexes, by avoiding recompilations.' 159 160 if flags in re_cache: 161 cache = re_cache[flags] 162 else: 163 cache = {} 164 re_cache[flags] = cache 165 if expr in cache: 166 return cache[expr] 167 168 pat = compile_uncached(expr, flags) 169 cache[expr] = pat 170 return pat 171 172 def icompile(expr): 173 return re_compile(expr, IGNORECASE) 174 175 def cond(*args): 176 if len(args) == 0: 177 return None 178 179 for i, e in enumerate(args): 180 if i % 2 == 0 and i < len(args) - 1 and e: 181 return args[i + 1] 182 183 return args[-1] if len(args) % 2 == 1 else None 184 185 def dive(into, using): 186 'Depth-first recursive caller for 1-input functions.' 187 188 if callable(into): 189 into, using = using, into 190 191 def rec(v): 192 if isinstance(v, dict): 193 return {k: rec(v) for k, v in v.items()} 194 if isinstance(v, Iterable) and not isinstance(v, str): 195 return [rec(v) for v in v] 196 return using(v) 197 198 return rec(into) 199 200 def divekeys(into, using): 201 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 202 203 if callable(into): 204 into, using = using, into 205 206 def rec(v): 207 if isinstance(v, dict): 208 return {using(k): rec(v) for k, v in v.items()} 209 if isinstance(v, Iterable) and not isinstance(v, str): 210 return [rec(v) for i, v in enumerate(v)] 211 return v 212 213 return rec(None, into) 214 215 def divekv(into, using, using2 = None): 216 'Depth-first recursive caller for 2-input functions.' 217 218 if using2 is None: 219 if callable(into): 220 into, using = using, into 221 else: 222 if not callable(using2): 223 into, using, using2 = using2, into, using 224 225 def rec(k, v): 226 if isinstance(v, dict): 227 return {k: rec(k, v) for k, v in v.items()} 228 if isinstance(v, Iterable) and not isinstance(v, str): 229 return [rec(i, v) for i, v in enumerate(v)] 230 return using(k, v) 231 232 def rec2(k, v): 233 if isinstance(v, dict): 234 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 235 if isinstance(v, Iterable) and not isinstance(v, str): 236 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 237 return [rec2(i, v) for i, v in enumerate(v)] 238 return using2(k, v) 239 240 return rec(None, into) if using2 is None else rec2(None, into) 241 242 kvdive = divekv 243 244 def drop(src, *what): 245 if isinstance(src, str): 246 for s in what: 247 src = src.replace(s, '') 248 return src 249 250 def kdrop(src, what): 251 return {k: v for (k, v) in src.items() if not (k in what)} 252 253 if isinstance(src, dict): 254 return kdrop(src, set(what)) 255 256 if isinstance(src, Iterable): 257 what = set(what) 258 return [kdrop(e, what) for e in src if isinstance(e, dict)] 259 260 return None 261 262 dropped = drop 263 264 def join(x, y = ' '): 265 'Join values into a string, or make a dict from keys and values.' 266 267 if isinstance(x, str): 268 return x.join(str(v) for v in y) 269 if isinstance(y, str): 270 return y.join(str(v) for v in x) 271 return {k: v for k, v in zip(x, y)} 272 273 def pick(src, *keys): 274 if isinstance(src, dict): 275 return {k: src.get(k, None) for k in keys} 276 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 277 278 picked = pick 279 280 def rescue(attempt, fallback = None): 281 try: 282 return attempt() 283 except Exception as e: 284 if callable(fallback): 285 return fallback(e) 286 return fallback 287 288 rescued = rescue 289 290 def retype(x): 291 'Try to narrow the type of the value given.' 292 293 if isinstance(x, float): 294 n = int(x) 295 return n if float(n) == x else x 296 297 if not isinstance(x, str): 298 return x 299 300 try: 301 return loads(x) 302 except Exception: 303 pass 304 305 try: 306 return int(x) 307 except Exception: 308 pass 309 310 try: 311 return float(x) 312 except Exception: 313 pass 314 315 return x 316 317 autocast = autocasted = mold = molded = recast = recasted = remold = retype 318 remolded = retyped = retype 319 320 def typeof(x): 321 # return str(type(x)) 322 return { 323 type(None): 'null', 324 bool: 'boolean', 325 dict: 'object', 326 float: 'number', 327 int: 'number', 328 str: 'string', 329 list: 'array', 330 tuple: 'array', 331 }.get(type(x), 'other') 332 333 jstype = typeof 334 335 336 def result_needs_fixing(x): 337 if isinstance(x, float): 338 return not isnan(x) 339 if x is None or isinstance(x, (bool, int, float, str)): 340 return False 341 rec = result_needs_fixing 342 if isinstance(x, dict): 343 return any(rec(k) or rec(v) for k, v in x.items()) 344 if isinstance(x, (list, tuple)): 345 return any(rec(e) for e in x) 346 return True 347 348 def fix_result(x, default): 349 if x is type: 350 return type(default).__name__ 351 352 # if expression results in a func, auto-call it with the original data 353 if callable(x): 354 x = x(default) 355 356 if isinstance(x, float) and isnan(x): 357 return None 358 359 if x is None or isinstance(x, (bool, int, float, str)): 360 return x 361 362 rec = fix_result 363 364 if isinstance(x, dict): 365 return { 366 rec(k, default): rec(v, default) for k, v in x.items() if not 367 (isinstance(k, Skip) or isinstance(v, Skip)) 368 } 369 370 if isinstance(x, Iterable): 371 return tuple(rec(e, default) for e in x if not isinstance(e, Skip)) 372 373 if isinstance(x, Dottable): 374 return rec(x.__dict__, default) 375 376 if isinstance(x, Exception): 377 raise x 378 379 return None if isinstance(x, Skip) else str(x) 380 381 def fail(msg, code = 1): 382 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 383 exit(code) 384 385 def message(msg, result = None): 386 print(msg, file=stderr) 387 return result 388 389 msg = message 390 391 def seemsurl(path): 392 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 393 return any(path.startswith(p) for p in protocols) 394 395 def matchkey(kv, key): 396 if key in kv: 397 return key 398 399 low = key.lower() 400 for k in kv.keys(): 401 if low == k.lower(): 402 return k 403 404 try: 405 i = int(key) 406 l = len(kv) 407 if i < 0: 408 i += l 409 410 if not (-l <= i < l): 411 return key 412 413 for j, k in enumerate(kv.keys()): 414 if i == j: 415 return k 416 except Exception: 417 return key 418 419 return key 420 421 def zoom(data, keys): 422 for i, k in enumerate(keys): 423 if isinstance(data, dict): 424 # m = matchkey(data, k) 425 # if not (m in data): 426 # raise Exception(f'{m}: object doesn\'t have that key') 427 data = data.get(matchkey(data, k), None) 428 continue 429 430 if isinstance(data, (list, tuple)): 431 if k == '+': 432 pick = keys[i + 1:] 433 return [{k: e.get(k, None) for k in pick} 434 for e in data if isinstance(e, dict)] 435 if k == '-': 436 avoid = set(keys[i + 1:]) 437 return [{k: v for (k, v) in e.items() if not (k in avoid)} 438 for e in data if isinstance(e, dict)] 439 if k == '.': 440 rest = keys[i + 1:] 441 return [zoom(e, rest) for e in data] 442 443 try: 444 k = int(k) 445 l = len(data) 446 data = data[k] if -l <= k < l else None 447 except Exception: 448 # raise Exception(f'{k}: arrays don\'t have keys like objects') 449 data = None 450 continue 451 452 # return None 453 # data = None 454 raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}') 455 456 return data 457 458 def make_eval_once(run): 459 def eval_once(expr): 460 global eval 461 eval = None 462 return run(expr) 463 return eval_once 464 465 466 cr = '\r' 467 crlf = '\r\n' 468 dquo = '"' 469 dquote = '"' 470 empty = '' 471 lcurly = '{' 472 lf = '\n' 473 rcurly = '}' 474 s = '' 475 squo = '\'' 476 squote = '\'' 477 478 nil = None 479 none = None 480 null = None 481 482 483 no_input_opts = ( 484 '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 485 ) 486 compact_output_opts = ( 487 '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0', 488 ) 489 dot_opts = ('--d', '-dot', '--dot', '-dots', '--dots') 490 modules_opts = ( 491 '-m', '--m', '-mod', '--mod', '-module', '--module', 492 '-modules', '--modules', 493 ) 494 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 495 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 496 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom') 497 498 args = argv[1:] 499 no_input = False 500 zoom_stdin = False 501 pipe_mode = False 502 trace_errors = False 503 dottable_input = False 504 compact_output = False 505 506 while len(args) > 0: 507 if args[0] == '--': 508 args = args[1:] 509 break 510 511 if args[0] in no_input_opts: 512 no_input = True 513 args = args[1:] 514 continue 515 516 if args[0] in compact_output_opts: 517 compact_output = True 518 args = args[1:] 519 continue 520 521 if args[0] in dot_opts: 522 dottable_input = True 523 args = args[1:] 524 continue 525 526 if args[0] in pipe_opts: 527 pipe_mode = True 528 args = args[1:] 529 break 530 531 if args[0] in modules_opts: 532 try: 533 if len(args) < 2: 534 msg = 'a module name or a comma-separated list of modules' 535 raise Exception('expected ' + msg) 536 537 g = globals() 538 from importlib import import_module 539 for e in args[1].split(','): 540 g[e] = import_module(e) 541 542 g = None 543 import_module = None 544 args = args[2:] 545 except Exception as e: 546 fail(e, 1) 547 548 continue 549 550 if args[0] in trace_opts: 551 trace_errors = True 552 args = args[1:] 553 continue 554 555 if args[0] in zoom_opts: 556 zoom_stdin = True 557 args = args[1:] 558 break 559 560 break 561 562 563 try: 564 if zoom_stdin: 565 data = load(stdin) 566 data = zoom(data, args) 567 else: 568 expr = 'data' 569 if len(args) > 0 and (not pipe_mode): 570 expr = args[0] 571 args = args[1:] 572 573 if expr == '.': 574 expr = 'data' 575 if not pipe_mode: 576 expr = compile(expr, expr, mode='eval') 577 578 if (not pipe_mode) and len(args) > 1: 579 raise Exception('can\'t use more than 1 input') 580 path = '-' if len(args) == 0 or pipe_mode else args[0] 581 582 if no_input: 583 data = None 584 elif path == '-': 585 data = load(stdin) 586 elif seemsurl(path): 587 from io import TextIOWrapper 588 from urllib.request import urlopen 589 with urlopen(path) as inp: 590 with TextIOWrapper(inp, encoding='utf-8') as txt: 591 data = load(txt) 592 else: 593 with open(path, encoding='utf-8') as inp: 594 data = load(inp) 595 596 if dottable_input: 597 data = dotate(data) 598 599 v = val = value = d = dat = data 600 exec = None 601 open = None 602 compile = None 603 604 if pipe_mode: 605 funcs = [eval(s) for s in args] 606 eval = None 607 608 # variable names `o` and `p` work like in the `pyp` tool, except 609 # the pipeline steps were given as separate cmd-line arguments 610 global o, p 611 612 o = p = prev = v 613 for f in funcs: 614 p = f(p) 615 if callable(p): 616 p = p(prev) 617 prev = p 618 v = p 619 else: 620 eval = make_eval_once(eval) 621 v = eval(expr) 622 623 if result_needs_fixing(v): 624 v = fix_result(v, value) 625 626 if compact_output: 627 dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False) 628 else: 629 dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False) 630 print() 631 except BrokenPipeError: 632 # quit quietly, instead of showing a confusing error message 633 stderr.close() 634 exit(0) 635 except KeyboardInterrupt: 636 exit(2) 637 except Exception as e: 638 if trace_errors: 639 raise e 640 else: 641 fail(e, 1)