File: tjp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tjp [options...] [python expression] [file/URI...] 28 29 30 Transform Json with Python runs a python expression on a single JSON-encoded 31 input. 32 33 The expression can use either `v`, `value`, `d`, or `data` for the decoded 34 input. Invalid-JSON inputs result in an error, with no chance of recovery. 35 36 Input-sources can be either files or web-URIs. When not given a named input, 37 the standard input is used. 38 39 40 Examples 41 42 # numbers from 0 to 5; no input is read/used 43 tjp = 'range(6)' 44 45 # using bases 1 to 5, find all their powers up to the 4th 46 tjp = '((n**p for p in range(1, 4+1)) for n in range(1, 6))' 47 48 # keep only the last 2 items from the input 49 tjp = 'range(1, 6)' | tjp 'data[-2:]' 50 51 # chunk/regroup input items into arrays of up to 3 items each 52 tjp = 'range(1, 8)' | tjp 'chunk(data, 3)' 53 54 # ignore errors/exceptions, in favor of a fallback value 55 tjp = 'rescue(lambda: 2 * float("no way"), "fallback value")' 56 57 # ignore errors/exceptions, calling a fallback func with the exception 58 tjp = 'rescue(lambda: 2 * float("no way"), str)' 59 60 # use dot-syntax on JSON data 61 tjp = '{"abc": {"xyz": 123}}' | tjp -d 'data.abc.xyz' 62 63 # use dot-syntax on JSON data; keywords as properties are syntax-errors 64 tjp = '{"abc": {"def": 123}}' | tjp -d 'data.abc["def"]' 65 66 # func results are automatically called on the input 67 tjp = '{"abc": 123, "def": 456}' | tjp len 68 ''' 69 70 71 from itertools import islice 72 from json import dump, load, loads 73 compile_py = compile 74 from re import compile as compile_uncached, IGNORECASE 75 from sys import argv, exit, stderr, stdin, stdout 76 from typing import Iterable 77 78 79 if len(argv) < 2: 80 print(info.strip(), file=stderr) 81 exit(0) 82 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 83 print(info.strip()) 84 exit(0) 85 86 87 class Skip: 88 pass 89 90 91 skip = Skip() 92 93 94 class Dottable: 95 'Enable convenient dot-syntax access to dictionary values.' 96 97 def __getattr__(self, key): 98 return self.__dict__[key] if key in self.__dict__ else None 99 100 def __getitem__(self, key): 101 return self.__dict__[key] if key in self.__dict__ else None 102 103 def __iter__(self): 104 return iter(self.__dict__) 105 106 def dotate(x): 107 'Recursively ensure all dictionaries in a value are dot-accessible.' 108 109 if isinstance(x, dict): 110 d = Dottable() 111 d.__dict__ = {k: dotate(v) for k, v in x.items()} 112 return d 113 if isinstance(x, list): 114 return [dotate(e) for e in x] 115 if isinstance(x, tuple): 116 return tuple(dotate(e) for e in x) 117 return x 118 119 dotated = dotate 120 dote = dotate 121 doted = dotate 122 dotified = dotate 123 dotify = dotate 124 dottified = dotate 125 dottify = dotate 126 127 128 def chunk(items, chunk_size): 129 'Break iterable into chunks, each with up to the item-count given.' 130 131 if isinstance(items, str): 132 n = len(items) 133 while n >= chunk_size: 134 yield items[:chunk_size] 135 items = items[chunk_size:] 136 n -= chunk_size 137 if n > 0: 138 yield items 139 return 140 141 if not isinstance(chunk_size, int): 142 raise Exception('non-integer chunk-size') 143 if chunk_size < 1: 144 raise Exception('non-positive chunk-size') 145 146 it = iter(items) 147 while True: 148 head = tuple(islice(it, chunk_size)) 149 if not head: 150 return 151 yield head 152 153 chunked = chunk 154 155 156 # re_cache is used by custom func compile to cache previously-compiled 157 # regular-expressions, which makes them quicker to (re)use in formulas 158 re_cache = {} 159 160 # ire_cache is like re_cache, except it's for case-insensitive regexes 161 ire_cache = {} 162 163 164 def compile(expr, flags = 0): 165 'Speed-up using regexes across lines, by avoiding recompilations.' 166 167 if flags != 0 and flags != IGNORECASE: 168 msg = 'only the default and case-insensitive options are supported' 169 raise Exception(msg) 170 171 cache = re_cache if flags == 0 else ire_cache 172 if expr in cache: 173 return cache[expr] 174 175 pat = compile_uncached(expr, flags) 176 cache[expr] = pat 177 return pat 178 179 180 def icompile(expr): 181 return compile(expr, IGNORECASE) 182 183 184 def cond(*args): 185 if len(args) == 0: 186 return None 187 188 for i, e in enumerate(args): 189 if i % 2 == 0 and i < len(args) - 1 and e: 190 return args[i + 1] 191 192 return args[-1] if len(args) % 2 == 1 else None 193 194 195 def dive(into, using): 196 'Depth-first recursive caller for 1-input functions.' 197 198 if callable(into): 199 into, using = using, into 200 201 def rec(v): 202 if isinstance(v, dict): 203 return {k: rec(v) for k, v in v.items()} 204 if isinstance(v, Iterable) and not isinstance(v, str): 205 return [rec(v) for v in v] 206 return using(v) 207 208 return rec(into) 209 210 211 def divekeys(into, using): 212 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 213 214 if callable(into): 215 into, using = using, into 216 217 def rec(v): 218 if isinstance(v, dict): 219 return {using(k): rec(v) for k, v in v.items()} 220 if isinstance(v, Iterable) and not isinstance(v, str): 221 return [rec(v) for i, v in enumerate(v)] 222 return v 223 224 return rec(None, into) 225 226 227 def divekv(into, using, using2 = None): 228 'Depth-first recursive caller for 2-input functions.' 229 230 if using2 is None: 231 if callable(into): 232 into, using = using, into 233 else: 234 if not callable(using2): 235 into, using, using2 = using2, into, using 236 237 def rec(k, v): 238 if isinstance(v, dict): 239 return {k: rec(k, v) for k, v in v.items()} 240 if isinstance(v, Iterable) and not isinstance(v, str): 241 return [rec(i, v) for i, v in enumerate(v)] 242 return using(k, v) 243 244 def rec2(k, v): 245 if isinstance(v, dict): 246 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 247 if isinstance(v, Iterable) and not isinstance(v, str): 248 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 249 return [rec2(i, v) for i, v in enumerate(v)] 250 return using2(k, v) 251 252 return rec(None, into) if using2 is None else rec2(None, into) 253 254 kvdive = divekv 255 256 257 def drop(src, *what): 258 if isinstance(src, str): 259 for s in what: 260 src = src.replace(s, '') 261 return src 262 263 def kdrop(src, what): 264 kv = {} 265 for k, v in src.items(): 266 if not (k in what): 267 kv[k] = v 268 return kv 269 270 if isinstance(src, dict): 271 return kdrop(src, set(what)) 272 273 if isinstance(src, Iterable): 274 what = set(what) 275 return [kdrop(e, what) for e in src] 276 277 return None 278 279 dropped = drop 280 281 282 def join(x, y = ' '): 283 'Join values into a string, or make a dict from keys and values.' 284 285 if isinstance(x, str): 286 return x.join(str(v) for v in y) 287 if isinstance(y, str): 288 return y.join(str(v) for v in x) 289 return {k: v for k, v in zip(x, y)} 290 291 292 def pick(src, *keys): 293 if isinstance(src, dict): 294 return {k: src.get(k, None) for k in keys} 295 return [{k: e.get(k, None) for k in keys} for e in src] 296 297 picked = pick 298 299 300 def rescue(attempt, fallback = None): 301 try: 302 return attempt() 303 except Exception as e: 304 if callable(fallback): 305 return fallback(e) 306 return fallback 307 308 catch = rescue 309 catched = rescue 310 caught = rescue 311 recover = rescue 312 recovered = rescue 313 rescued = rescue 314 315 316 def retype(x): 317 'Try to narrow the type of the value given.' 318 319 if isinstance(x, float): 320 n = int(x) 321 return n if float(n) == x else x 322 323 if not isinstance(x, str): 324 return x 325 326 try: 327 return loads(x) 328 except Exception: 329 pass 330 331 try: 332 return int(x) 333 except Exception: 334 pass 335 336 try: 337 return float(x) 338 except Exception: 339 pass 340 341 return x 342 343 autocast = retype 344 autocasted = retype 345 mold = retype 346 molded = retype 347 recast = retype 348 recasted = retype 349 remold = retype 350 remolded = retype 351 retyped = retype 352 353 354 def typeof(x): 355 # return str(type(x)) 356 return { 357 type(None): 'null', 358 bool: 'boolean', 359 dict: 'object', 360 float: 'number', 361 int: 'number', 362 str: 'string', 363 list: 'array', 364 tuple: 'array', 365 }.get(type(x), 'other') 366 367 jstype = typeof 368 369 370 def result_needs_fixing(x): 371 if x is None or isinstance(x, (bool, int, float, str)): 372 return False 373 rec = result_needs_fixing 374 if isinstance(x, dict): 375 return any(rec(k) or rec(v) for k, v in x.items()) 376 if isinstance(x, (list, tuple)): 377 return any(rec(e) for e in x) 378 return True 379 380 381 def fix_result(x, default): 382 if x is type: 383 return type(default).__name__ 384 385 # if expression results in a func, auto-call it with the original data 386 if callable(x): 387 x = x(default) 388 389 if x is None or isinstance(x, (bool, int, float, str)): 390 return x 391 392 rec = fix_result 393 394 if isinstance(x, dict): 395 return { 396 rec(k, default): rec(v, default) for k, v in x.items() if not 397 (isinstance(k, Skip) or isinstance(v, Skip)) 398 } 399 400 if isinstance(x, Iterable): 401 return tuple(rec(e, default) for e in x if not isinstance(e, Skip)) 402 403 if isinstance(x, Dottable): 404 return rec(x.__dict__, default) 405 406 if isinstance(x, Exception): 407 raise x 408 409 return None if isinstance(x, Skip) else str(x) 410 411 412 def fail(msg, code = 1): 413 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 414 exit(code) 415 416 417 def message(msg, result = None): 418 print(msg, file=stderr) 419 return result 420 421 msg = message 422 423 424 def seemsurl(path): 425 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 426 return any(path.startswith(p) for p in protocols) 427 428 429 def matchkey(kv, key): 430 if key in kv: 431 return key 432 433 low = key.lower() 434 for k in kv.keys(): 435 if low == k.lower(): 436 return k 437 438 try: 439 i = int(key) 440 l = len(kv) 441 if i < 0: 442 i += l 443 444 if not (-l <= i < l): 445 return key 446 447 for j, k in enumerate(kv.keys()): 448 if i == j: 449 return k 450 except Exception as _: 451 return key 452 453 return key 454 455 456 def zoom(data, keys): 457 for k in keys: 458 if isinstance(data, dict): 459 # m = matchkey(data, k) 460 # if not (m in data): 461 # raise Exception(f'{m}: object doesn\'t have that key') 462 data = data.get(matchkey(data, k), None) 463 continue 464 465 if isinstance(data, (list, tuple)): 466 try: 467 k = int(k) 468 l = len(data) 469 data = data[k] if -l <= k < l else None 470 except Exception as _: 471 # raise Exception(f'{k}: arrays don\'t have keys like objects') 472 data = None 473 continue 474 475 # return None 476 # if not (data is None): 477 # data = None 478 # continue 479 raise Exception(f'{k}: can\'t zoom on value of type {typeof(data)}') 480 481 return data 482 483 484 def make_eval_once(run): 485 def eval_once(expr): 486 global eval 487 eval = None 488 return run(expr) 489 return eval_once 490 491 eval = make_eval_once(eval) 492 493 494 cr = '\r' 495 crlf = '\r\n' 496 dquo = '"' 497 dquote = '"' 498 empty = '' 499 lcurly = '{' 500 lf = '\n' 501 rcurly = '}' 502 s = '' 503 squo = '\'' 504 squote = '\'' 505 # utf8bom = '\xef\xbb\xbf' 506 507 nil = None 508 none = None 509 null = None 510 511 512 no_input_opts = ( 513 '=', '-n', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 514 ) 515 compact_output_opts = ( 516 '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0', 517 ) 518 dot_opts = ('-d', '--d', '-dot', '--dot', '-dots', '--dots') 519 modules_opts = ( 520 '-m', '--m', '-mod', '--mod', '-module', '--module', 521 '-modules', '--modules', 522 ) 523 more_modules_opts = ('-mm', '--mm', '-more', '--more') 524 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 525 zoom_opts = ('-z', '--z', '-zj', '--zj', '-zoom', '--zoom') 526 527 args = argv[1:] 528 no_input = False 529 zoom_stdin = False 530 trace_errors = False 531 dottable_input = False 532 compact_output = False 533 534 while len(args) > 0: 535 if args[0] in no_input_opts: 536 no_input = True 537 args = args[1:] 538 continue 539 540 if args[0] in compact_output_opts: 541 compact_output = True 542 args = args[1:] 543 continue 544 545 if args[0] in dot_opts: 546 dottable_input = True 547 args = args[1:] 548 continue 549 550 if args[0] in modules_opts: 551 try: 552 if len(args) < 2: 553 msg = 'a module name or a comma-separated list of modules' 554 raise Exception('expected ' + msg) 555 556 g = globals() 557 from importlib import import_module 558 for e in args[1].split(','): 559 g[e] = import_module(e) 560 561 g = None 562 import_module = None 563 args = args[2:] 564 except Exception as e: 565 fail(e, 1) 566 567 continue 568 569 if args[0] in more_modules_opts: 570 import functools, itertools, json, math, random, statistics, string, time 571 args = args[1:] 572 continue 573 574 if args[0] in trace_opts: 575 trace_errors = True 576 args = args[1:] 577 continue 578 579 if args[0] in zoom_opts: 580 zoom_stdin = True 581 args = args[1:] 582 break 583 584 break 585 586 587 try: 588 expr = 'data' 589 if len(args) > 0: 590 expr = args[0] 591 args = args[1:] 592 593 if expr == '.': 594 expr = 'data' 595 expr = compile_py(expr, expr, mode='eval') 596 597 if len(args) > 1: 598 raise Exception('can\'t use more than 1 input') 599 path = '-' if len(args) == 0 else args[0] 600 601 if no_input: 602 data = None 603 elif zoom_stdin: 604 data = load(stdin) 605 data = zoom(data, args) 606 elif path == '-': 607 data = load(stdin) 608 elif seemsurl(path): 609 from io import TextIOWrapper 610 from urllib.request import urlopen 611 with urlopen(path) as inp: 612 with TextIOWrapper(inp, encoding='utf-8') as txt: 613 data = load(txt) 614 else: 615 with open(path, encoding='utf-8') as inp: 616 data = load(inp) 617 618 if (not zoom_stdin) and dottable_input: 619 data = dotate(data) 620 621 v = value = d = data 622 623 if not zoom_stdin: 624 compile_py = None 625 exec = None 626 open = None 627 v = eval(expr) 628 if result_needs_fixing(v): 629 v = fix_result(v, data) 630 631 if compact_output: 632 dump(v, stdout, indent=None, separators=(',', ':'), allow_nan=False) 633 else: 634 dump(v, stdout, indent=2, separators=(',', ': '), allow_nan=False) 635 print() 636 except BrokenPipeError: 637 # quit quietly, instead of showing a confusing error message 638 stderr.close() 639 exit(0) 640 except KeyboardInterrupt: 641 # stderr.close() 642 exit(2) 643 except Exception as e: 644 if trace_errors: 645 raise e 646 else: 647 fail(e, 1)