File: zj.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from inspect import getfullargspec 27 from itertools import islice 28 from json import load, dump, dumps 29 from re import compile as compile_re 30 from sys import argv, stderr, stdin, stdout 31 from typing import Any, Callable, Dict, Iterable, Tuple 32 33 34 info_msg = ''' 35 zj [keys/indices...] 36 37 38 Zoom Json digs into a subset of valid JSON input, using the given mix of 39 keys and array-indices, the latter being either 0-based or negative, to 40 index backward from the ends of arrays. 41 42 Zooming on object keys is first tried as an exact key-match, failing that 43 as a case-insensitive key-match (first such match): when both approaches 44 fail, if the key is a valid integer, the key at the (even negative) index 45 given is used. 46 47 Invalid array-indices and missing object-keys result in null values, when 48 none of the special keys/fallbacks shown later apply. 49 50 You can slice arrays the exclusive/go/python way using index-pairs with a 51 `:` between the start/end pair, as long as it's a single argument; you can 52 even use `..` as the index-pair separator to include the stop index in the 53 result. Either way, as with go/python, you can omit either of the indices 54 when slicing. 55 56 Special key `.` acts as implicit loops on arrays, and even objects without 57 that specific key: in the unlikely case that an object has `.` as one of 58 its keys, you can use one of loop-fallback aliases, shown later. 59 60 Another special key is `+` (no quotes): when used, the rest of the keys 61 are used `in parallel`, allowing multiple picks from the current value. 62 63 Similar to `+`, the `-` fallback-key drops keys, which means all items are 64 picked, except for those mentioned after the `-`. 65 66 In case any of the special keys are actual keys in the data loaded, some 67 aliases are available: 68 69 . /. ./ :. .: 70 + /+ +/ :+ +: 71 - /- -/ :- -: 72 73 .i :i .info :info :info: 74 .k :k .keys :keys :keys: 75 .l :l .len .length :len :len: :length :length: 76 .tally :tally :tally: 77 .t :t .type :type :type: 78 .u :u .unique :unique :unique: 79 80 These aliases allow using the special functionality even on objects whose 81 keys match some of these special names, as it's extremely unlikely data use 82 all aliases as actual keys at any level. 83 84 The only input supported is valid JSON coming from standard-input: there's 85 no way to load files using their names. To load data from files/URIs use 86 tools like `cat` or `curl`, and pipe their output into this tool. 87 ''' 88 89 slice_re = compile_re('''^(([+-]?[0-9]+)?)(:|\\.\\.)(([+-]?[0-9]+)?)$''') 90 91 92 def zoom(data: Any, keys: Tuple[str, ...]) -> Any: 93 eval_due = False 94 pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:') 95 96 for i, k in enumerate(keys): 97 try: 98 if eval_due: 99 data = eval(k)(data) 100 eval_due = False 101 continue 102 103 if isinstance(data, dict): 104 m = match_key(data, k) 105 if m in data: 106 data = data[m] 107 continue 108 m = slice_re.match(k) 109 if m: 110 data = {k: data[k] for k in match_keys(data, k)} 111 continue 112 113 if isinstance(data, (list, tuple)): 114 try: 115 i = int(k) 116 l = len(data) 117 data = data[i] if -l <= i < l else None 118 continue 119 except Exception: 120 m = slice_re.match(k) 121 if m: 122 data = [data[i] for i in match_indices(data, k)] 123 continue 124 125 if k in pyf: 126 eval_due = True 127 continue 128 129 if k in ('.', '/.', './', ':.', '.:'): 130 if isinstance(data, dict): 131 rest = tuple(keys[i + 1:]) 132 return {k: zoom(v, rest) for k, v in data.items()} 133 if isinstance(data, (list, tuple)): 134 rest = tuple(keys[i + 1:]) 135 return tuple(zoom(v, rest) for v in data) 136 137 # doing nothing amounts to an identity-op for simple values 138 continue 139 140 fn = final_fallbacks.get(k, None) 141 if fn: 142 return fn(data, tuple(keys[i + 1:])) 143 144 fn = fallbacks.get(k, None) 145 if fn: 146 data = fn(data) 147 continue 148 149 if isinstance(data, (dict, list, tuple)): 150 data = None 151 continue 152 153 kind = typeof(data) 154 msg = f'value of type {kind} has no properties to zoom into' 155 raise Exception(msg) 156 except Exception as e: 157 key_path = ' > '.join(islice(keys, None, i + 1)) 158 raise Exception(f'{key_path}: {e}') 159 160 return data 161 162 def match_key(src: Dict, key: str) -> str: 163 if key in src: 164 return key 165 166 low = key.casefold() 167 for k in src.keys(): 168 if low == k.casefold(): 169 return k 170 171 try: 172 i = int(key) 173 l = len(src) 174 if i < 0: 175 i += l 176 if i < 0 or i >= l: 177 return None 178 for j, k in enumerate(src.keys()): 179 if i == j: 180 return k 181 except Exception: 182 return key 183 184 return key 185 186 def match_keys(src: Any, key: str) -> Iterable: 187 if isinstance(src, (list, tuple)): 188 yield from match_indices(src, key) 189 yield from match_fallbacks(src, key) 190 return 191 192 if isinstance(src, dict): 193 if key in src: 194 yield key 195 return 196 197 low = key.casefold() 198 for k in src.keys(): 199 if low == k.casefold(): 200 yield k 201 return 202 203 yield from match_indices(src, key) 204 yield from match_fallbacks(src, key) 205 return 206 207 yield from match_fallbacks(src, key) 208 209 def match_indices(src: Any, key: str) -> Iterable: 210 try: 211 i = int(key) 212 213 if isinstance(src, (list, tuple)): 214 l = len(src) 215 yield src[i] if -l <= i < l else None 216 return 217 218 if isinstance(src, dict): 219 l = len(src) 220 if i < 0: 221 i += l 222 if i < 0 or i >= l: 223 return 224 225 for j, k in enumerate(src.keys()): 226 if i == j: 227 yield k 228 return 229 230 return 231 except Exception: 232 pass 233 234 m = slice_re.match(key) 235 if not m: 236 return 237 238 l = len(src) 239 240 (start, _, kind, stop, _) = m.groups() 241 start = int(start) if start != '' else 0 242 stop = int(stop) if stop != '' else l 243 244 if start < 0: 245 start += l 246 start = max(start, 0) 247 if stop < 0: 248 stop += l 249 stop = min(stop, l) 250 if kind == '..': 251 stop += 1 252 stop = min(stop, l) 253 254 if start > stop: 255 return 256 if (start < 0 and stop < 0) or (start >= l and stop >= l): 257 return 258 259 if isinstance(src, dict): 260 for i, k in enumerate(src.keys()): 261 if i >= stop: 262 return 263 if start <= i: 264 yield k 265 return 266 267 if isinstance(src, (list, tuple)): 268 yield from range(start, stop) 269 return 270 271 def match_fallbacks(src: Any, key: str) -> Iterable: 272 fn = fallbacks.get(key, None) 273 if fn: 274 yield fn(src) 275 276 def keys(src: Any) -> Any: 277 if isinstance(src, dict): 278 return tuple(src.keys()) 279 if isinstance(src, (list, tuple)): 280 return tuple(range(len(src))) 281 return None 282 283 def info(x: Any) -> str: 284 if isinstance(x, dict): 285 return f'object ({len(x)} items)' 286 if isinstance(x, (list, tuple)): 287 return f'array ({len(x)} items)' 288 return typeof(x) 289 290 def tally(x: Any) -> Any: 291 if not isinstance(x, (list, tuple)): 292 return None 293 294 tally = {} 295 for v in x: 296 s = dumps(v, indent=None, allow_nan=False, check_circular=False) 297 if s in tally: 298 tally[s] += 1 299 else: 300 tally[s] = 1 301 keys = sorted(tally.keys(), key=lambda k: tally[k], reverse=True) 302 return {k: tally[k] for k in keys} 303 304 def typeof(x: Any) -> str: 305 return { 306 type(None): 'null', 307 bool: 'boolean', 308 dict: 'object', 309 float: 'number', 310 int: 'number', 311 str: 'string', 312 list: 'array', 313 tuple: 'array', 314 }.get(type(x), 'other') 315 316 def unique(x: Any) -> Any: 317 if not isinstance(x, (list, tuple)): 318 return x 319 320 got = set() 321 unique = [] 322 for v in x: 323 s = dumps(v, indent=None, allow_nan=False, check_circular=False) 324 if s in got: 325 continue 326 unique.append(v) 327 got.add(s) 328 return unique 329 330 331 fallbacks: Dict[str, Callable] = { 332 '.i': info, 333 '.info': info, 334 ':i': info, 335 ':info': info, 336 ':info:': info, 337 '.k': keys, 338 '.keys': keys, 339 ':keys': keys, 340 ':keys:': keys, 341 '.kind': typeof, 342 ':kind': typeof, 343 ':kind:': typeof, 344 '.l': len, 345 '.len': len, 346 '.length': len, 347 ':l': len, 348 ':len': len, 349 ':length': len, 350 ':len:': len, 351 ':length:': len, 352 '.tally': tally, 353 ':tally': tally, 354 ':tally:': tally, 355 '.t': typeof, 356 '.type': typeof, 357 ':t': typeof, 358 ':type': typeof, 359 ':type:': typeof, 360 '.u': unique, 361 '.unique': unique, 362 ':u': unique, 363 ':unique': unique, 364 ':u:': unique, 365 ':unique:': unique, 366 } 367 368 369 def pick(src: Any, keys: Tuple[str, ...]) -> Any: 370 if isinstance(src, dict): 371 picked = {} 372 for k in keys: 373 for k in match_keys(src, k): 374 picked[k] = src[k] 375 return picked 376 377 # if isinstance(src, (list, tuple)): 378 # picked = [] 379 # for k in keys: 380 # for i in match_indices(src, k): 381 # picked.append(src[i]) 382 # return tuple(picked) 383 384 if isinstance(src, (list, tuple)): 385 return tuple(pick(e, keys) for e in src if isinstance(e, dict)) 386 387 msg = f'can\'t pick properties from value of type {typeof(src)}' 388 raise Exception(msg) 389 390 def drop(src: Any, keys: Tuple[str, ...]) -> Any: 391 if isinstance(src, dict): 392 avoid = set() 393 for k in keys: 394 for k in match_keys(src, k): 395 avoid.add(k) 396 return {k: v for k, v in src.items() if not k in avoid} 397 398 # if isinstance(src, (list, tuple)): 399 # l = len(src) 400 # avoid = set() 401 # for k in keys: 402 # for i in match_indices(src, k): 403 # avoid.add(i if i >= 0 else i + l) 404 # return tuple(v for i, v in enumerate(src) if not i in avoid) 405 406 if isinstance(src, (list, tuple)): 407 return tuple(drop(e, keys) for e in src if isinstance(e, dict)) 408 409 msg = f'can\'t drop properties from value of type {typeof(src)}' 410 raise Exception(msg) 411 412 413 final_fallbacks: Dict[str, Callable] = { 414 '+': pick, 415 ':+:': pick, 416 ':+': pick, 417 '+:': pick, 418 '/+': pick, 419 '+/': pick, 420 421 '-': drop, 422 ':-:': drop, 423 ':-': drop, 424 '-:': drop, 425 '/-': drop, 426 '-/': drop, 427 } 428 429 430 # extra imports for the `python-lambda` option 431 432 pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:') 433 434 if any(s in pyf for s in argv): 435 import decimal 436 import fractions 437 import json 438 import functools 439 import itertools 440 import math 441 Math = math 442 from math import \ 443 acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \ 444 copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \ 445 fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \ 446 isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \ 447 log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \ 448 radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp 449 try: 450 from math import cbrt, exp2 451 except Exception: 452 pass 453 power = pow 454 import operator 455 import statistics 456 import string 457 import textwrap 458 import time 459 import urllib.parse 460 461 462 apo = '\'' 463 apos = '\'' 464 backquote = '`' 465 backtick = '`' 466 ball = '●' 467 block = '█' 468 btick = '`' 469 bullet = '•' 470 cdot = '·' 471 circle = '●' 472 cross = '×' 473 dquo = '"' 474 dquote = '"' 475 emdash = '—' 476 endash = '–' 477 ge = '≥' 478 geq = '≥' 479 hellip = '…' 480 hole = '○' 481 lcurly = '{' 482 ldquo = '“' 483 ldquote = '“' 484 le = '≤' 485 leq = '≤' 486 mdash = '—' 487 mdot = '·' 488 miniball = '•' 489 ndash = '–' 490 neq = '≠' 491 rcurly = '}' 492 rdquo = '”' 493 rdquote = '”' 494 sball = '•' 495 square = '■' 496 squo = '\'' 497 squote = '\'' 498 499 500 def dive(into: Any, doing: Callable) -> Any: 501 'Transform a nested value by calling a func via depth-first recursion.' 502 503 # support args in either order 504 if callable(into): 505 into, doing = doing, into 506 507 return _dive_kv(None, into, doing) 508 509 deepmap = dive1 = dive 510 511 def divebin(x: Any, y: Any, doing: Callable) -> Any: 512 'Nested 2-value version of depth-first-recursive func dive.' 513 514 # support args in either order 515 if callable(x): 516 x, y, doing = y, doing, x 517 518 narg = required_arg_count(doing) 519 if narg == 2: 520 return dive(x, lambda a: dive(y, lambda b: doing(a, b))) 521 if narg == 4: 522 return dive(x, lambda i, a: dive(y, lambda j, b: doing(i, a, j, b))) 523 raise Exception('divebin(...) only supports funcs with 2 or 4 args') 524 525 bindive = dive2 = divebin 526 527 def _dive_kv(key: Any, into: Any, doing: Callable) -> Any: 528 if isinstance(into, dict): 529 return {k: _dive_kv(k, v, doing) for k, v in into.items()} 530 if isinstance(into, Iterable) and not isinstance(into, str): 531 return [_dive_kv(i, e, doing) for i, e in enumerate(into)] 532 533 narg = required_arg_count(doing) 534 return doing(key, into) if narg == 2 else doing(into) 535 536 def rescue(attempt: Callable, fallback: Any = None) -> Any: 537 try: 538 return attempt() 539 except Exception as e: 540 if callable(fallback): 541 return fallback(e) 542 return fallback 543 544 catch = recover = recovered = rescued = rescue 545 546 def required_arg_count(f: Callable) -> int: 547 if isinstance(f, type): 548 return 1 549 550 meta = getfullargspec(f) 551 n = len(meta.args) 552 if meta.defaults: 553 n -= len(meta.defaults) 554 return n 555 556 557 # deny file-access to expression-evaluators 558 open = None 559 560 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 561 print(info_msg.strip(), file=stderr) 562 exit(0) 563 564 compact_opts = ( 565 '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0', 566 ) 567 568 args = argv[1:] 569 compact = False 570 571 while len(args) > 0: 572 if args[0] == '--': 573 args = args[1:] 574 break 575 576 if args[0] in compact_opts: 577 compact = True 578 args = args[1:] 579 continue 580 581 break 582 583 try: 584 data = load(stdin.buffer) 585 data = zoom(data, tuple(args)) 586 v, w = data, stdout 587 i = None if compact else 2 588 s = (',', ':') if compact else (',', ': ') 589 dump(v, w, indent=i, separators=s, allow_nan=False, check_circular=False) 590 stdout.write('\n') 591 except BrokenPipeError: 592 # quit quietly, instead of showing a confusing error message 593 stderr.close() 594 exit(0) 595 except KeyboardInterrupt: 596 exit(2) 597 except Exception as e: 598 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 599 exit(1)