File: zj.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from inspect import getfullargspec
  27 from itertools import islice
  28 from json import load, dump, dumps
  29 from sys import argv, stderr, stdin, stdout
  30 from typing import Any, Callable, Dict, Iterable, NoReturn, Tuple
  31 
  32 
  33 from re import compile as compile_re
  34 
  35 
  36 info_msg = '''
  37 zj [keys/indices...]
  38 
  39 
  40 Zoom Json digs into a subset of valid JSON input, using the given mix of
  41 keys and array-indices, the latter being either 0-based or negative, to
  42 index backward from the ends of arrays.
  43 
  44 Zooming on object keys is first tried as an exact key-match, failing that
  45 as a case-insensitive key-match (first such match): when both approaches
  46 fail, if the key is a valid integer, the key at the (even negative) index
  47 given is used.
  48 
  49 Invalid array-indices and missing object-keys result in null values, when
  50 none of the special keys/fallbacks shown later apply.
  51 
  52 You can slice arrays the exclusive/go/python way using index-pairs with a
  53 `:` between the start/end pair, as long as it's a single argument; you can
  54 even use `..` as the index-pair separator to include the stop index in the
  55 result. Either way, as with go/python, you can omit either of the indices
  56 when slicing.
  57 
  58 Special key `.` acts as implicit loops on arrays, and even objects without
  59 that specific key: in the unlikely case that an object has `.` as one of
  60 its keys, you can use one of loop-fallback aliases, shown later.
  61 
  62 Another special key is `+` (no quotes): when used, the rest of the keys
  63 are used `in parallel`, allowing multiple picks from the current value.
  64 When picking array items, you can also use either type (`:` or `..`) of
  65 slicing, even mixing it with individual indices.
  66 
  67 Similar to `+`, the `-` fallback-key drops keys, which means all items are
  68 picked, except for those mentioned after the `-`.
  69 
  70 Unlike the looping special key, after the first `+` special-key, all keys
  71 following it, special or not, are picked normally.
  72 
  73 In case any of the special keys are actual keys in the data loaded, some
  74 aliases are available:
  75 
  76     .   /.  ./  :.  .:
  77     +   /+  +/  :+  +:
  78     -   /-  -/  :-  -:
  79 
  80     .i   :i   .info    :info     :info:
  81     .k   :k   .keys    :keys     :keys:
  82     .l   :l   .len     .length   :len      :len:    :length    :length:
  83               .tally   :tally    :tally:
  84     .t   :t   .type    :type     :type:
  85     .u   :u   .unique  :unique   :unique:
  86 
  87 These aliases allow using the special functionality even on objects whose
  88 keys match some of these special names, as it's extremely unlikely data use
  89 all aliases as actual keys at any level.
  90 
  91 The only input supported is valid JSON coming from standard-input: there's
  92 no way to load files using their names. To load data from files/URIs use
  93 tools like `cat` or `curl`, and pipe their output into this tool.
  94 '''
  95 
  96 slice_re = compile_re('''^(([+-]?[0-9]+)?)(:|\.\.)(([+-]?[0-9]+)?)$''')
  97 
  98 
  99 def zoom(data: Any, keys: Tuple[str, ...]) -> Any:
 100     eval_due = False
 101     pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
 102 
 103     for i, k in enumerate(keys):
 104         try:
 105             if eval_due:
 106                 data = eval(k)(data)
 107                 eval_due = False
 108                 continue
 109 
 110             if isinstance(data, dict):
 111                 m = match_key(data, k)
 112                 if m in data:
 113                     data = data[m]
 114                     continue
 115                 m = slice_re.match(k)
 116                 if m:
 117                     data = {k: data[k] for k in match_keys(data, k)}
 118                     continue
 119 
 120             if isinstance(data, (list, tuple)):
 121                 try:
 122                     i = int(k)
 123                     l = len(data)
 124                     data = data[i] if -l <= i < l else None
 125                     continue
 126                 except Exception:
 127                     m = slice_re.match(k)
 128                     if m:
 129                         data = [data[i] for i in match_indices(data, k)]
 130                         continue
 131 
 132             if k in pyf:
 133                 eval_due = True
 134                 continue
 135 
 136             if k in ('.', '/.', './', ':.', '.:'):
 137                 if isinstance(data, dict):
 138                     rest = tuple(keys[i + 1:])
 139                     return {k: zoom(v, rest) for k, v in data.items()}
 140                 if isinstance(data, (list, tuple)):
 141                     rest = tuple(keys[i + 1:])
 142                     return tuple(zoom(v, rest) for v in data)
 143 
 144                 # doing nothing amounts to an identity-op for simple values
 145                 continue
 146 
 147             fn = final_fallbacks.get(k, None)
 148             if fn:
 149                 return fn(data, tuple(keys[i + 1:]))
 150 
 151             fn = fallbacks.get(k, None)
 152             if fn:
 153                 data = fn(data)
 154                 continue
 155 
 156             if isinstance(data, (dict, list, tuple)):
 157                 data = None
 158                 continue
 159 
 160             kind = typeof(data)
 161             msg = f'value of type {kind} has no properties to zoom into'
 162             raise Exception(msg)
 163         except Exception as e:
 164             key_path = ' > '.join(islice(keys, None, i + 1))
 165             raise Exception(f'{key_path}: {e}')
 166 
 167     return data
 168 
 169 
 170 def match_key(src: Dict, key: str) -> str:
 171     if key in src:
 172         return key
 173 
 174     low = key.casefold()
 175     for k in src.keys():
 176         if low == k.casefold():
 177             return k
 178 
 179     try:
 180         i = int(key)
 181         l = len(src)
 182         if i < 0:
 183             i += l
 184         if i < 0 or i >= l:
 185             return None
 186         for j, k in enumerate(src.keys()):
 187             if i == j:
 188                 return k
 189     except Exception:
 190         return key
 191     return key
 192 
 193 
 194 def match_keys(src: Any, key: str) -> Iterable:
 195     if isinstance(src, (list, tuple)):
 196         yield from match_indices(src, key)
 197         yield from match_fallbacks(src, key)
 198         return
 199 
 200     if isinstance(src, dict):
 201         if key in src:
 202             yield key
 203             return
 204 
 205         low = key.casefold()
 206         for k in src.keys():
 207             if low == k.casefold():
 208                 yield k
 209                 return
 210 
 211         yield from match_indices(src, key)
 212         yield from match_fallbacks(src, key)
 213         return
 214 
 215     yield from match_fallbacks(src, key)
 216 
 217 
 218 def match_indices(src: Any, key: str) -> Iterable:
 219     try:
 220         i = int(key)
 221 
 222         if isinstance(src, (list, tuple)):
 223             l = len(src)
 224             yield src[i] if -l <= i < l else None
 225             return
 226 
 227         if isinstance(src, dict):
 228             l = len(src)
 229             if i < 0:
 230                 i += l
 231             if i < 0 or i >= l:
 232                 return
 233 
 234             for j, k in enumerate(src.keys()):
 235                 if i == j:
 236                     yield k
 237                     return
 238 
 239         return
 240     except Exception:
 241         pass
 242 
 243     m = slice_re.match(key)
 244     if not m:
 245         return
 246 
 247     l = len(src)
 248 
 249     (start, _, kind, stop, _) = m.groups()
 250     start = int(start) if start != '' else 0
 251     stop = int(stop) if stop != '' else l
 252 
 253     if start < 0:
 254         start += l
 255     start = max(start, 0)
 256     if stop < 0:
 257         stop += l
 258     stop = min(stop, l)
 259     if kind == '..':
 260         stop += 1
 261     stop = min(stop, l)
 262 
 263     if start > stop:
 264         return
 265     if (start < 0 and stop < 0) or (start >= l and stop >= l):
 266         return
 267 
 268 
 269     if isinstance(src, dict):
 270         for i, k in enumerate(src.keys()):
 271             if i >= stop:
 272                 return
 273             if start <= i:
 274                 yield k
 275         return
 276 
 277     if isinstance(src, (list, tuple)):
 278         yield from range(start, stop)
 279         return
 280 
 281 
 282 
 283 def match_fallbacks(src: Any, key: str) -> Iterable:
 284     fn = fallbacks.get(key, None)
 285     if fn:
 286         yield fn(src)
 287 
 288 
 289 def show_help(*_) -> NoReturn:
 290     print(info_msg.strip(), file=stderr)
 291     exit(1)
 292 
 293 
 294 def keys(src: Any) -> Any:
 295     if isinstance(src, dict):
 296         return tuple(src.keys())
 297     if isinstance(src, (list, tuple)):
 298         return tuple(range(len(src)))
 299     return None
 300 
 301 
 302 def info(x: Any) -> str:
 303     if isinstance(x, dict):
 304         return f'object ({len(x)} items)'
 305     if isinstance(x, (list, tuple)):
 306         return f'array ({len(x)} items)'
 307     return typeof(x)
 308 
 309 
 310 def tally(x: Any) -> Any:
 311     if not isinstance(x, (list, tuple)):
 312         return None
 313     tally = {}
 314     for v in x:
 315         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 316         if s in tally:
 317             tally[s] += 1
 318         else:
 319             tally[s] = 1
 320     keys = sorted(tally.keys(), key=lambda k: tally[k], reverse=True)
 321     return {k: tally[k] for k in keys}
 322 
 323 
 324 def typeof(x: Any) -> str:
 325     return {
 326         type(None): 'null',
 327         bool: 'boolean',
 328         dict: 'object',
 329         float: 'number',
 330         int: 'number',
 331         str: 'string',
 332         list: 'array',
 333         tuple: 'array',
 334     }.get(type(x), 'other')
 335 
 336 
 337 def unique(x: Any) -> Any:
 338     if not isinstance(x, (list, tuple)):
 339         return x
 340     got = set()
 341     unique = []
 342     for v in x:
 343         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 344         if s in got:
 345             continue
 346         unique.append(v)
 347         got.add(s)
 348     return unique
 349 
 350 
 351 fallbacks: Dict[str, Callable] = {
 352     '.h': show_help,
 353     '.help': show_help,
 354     ':h': show_help,
 355     ':help': show_help,
 356     ':help:': show_help,
 357     '.i': info,
 358     '.info': info,
 359     ':i': info,
 360     ':info': info,
 361     ':info:': info,
 362     '.k': keys,
 363     '.keys': keys,
 364     ':keys': keys,
 365     ':keys:': keys,
 366     '.kind': typeof,
 367     ':kind': typeof,
 368     ':kind:': typeof,
 369     '.l': len,
 370     '.len': len,
 371     '.length': len,
 372     ':l': len,
 373     ':len': len,
 374     ':length': len,
 375     ':len:': len,
 376     ':length:': len,
 377     '.tally': tally,
 378     ':tally': tally,
 379     ':tally:': tally,
 380     '.t': typeof,
 381     '.type': typeof,
 382     ':t': typeof,
 383     ':type': typeof,
 384     ':type:': typeof,
 385     '.u': unique,
 386     '.unique': unique,
 387     ':u': unique,
 388     ':unique': unique,
 389     ':u:': unique,
 390     ':unique:': unique,
 391 }
 392 
 393 
 394 def pick(src: Any, keys: Tuple[str, ...]) -> Any:
 395     if isinstance(src, dict):
 396         picked = {}
 397         for k in keys:
 398             for k in match_keys(src, k):
 399                 picked[k] = src[k]
 400         return picked
 401 
 402     # if isinstance(src, (list, tuple)):
 403     #     picked = []
 404     #     for k in keys:
 405     #         for i in match_indices(src, k):
 406     #             picked.append(src[i])
 407     #     return tuple(picked)
 408 
 409     if isinstance(src, (list, tuple)):
 410         return tuple(pick(e, keys) for e in src if isinstance(e, dict))
 411 
 412     msg = f'can\'t pick properties from value of type {typeof(src)}'
 413     raise Exception(msg)
 414 
 415 
 416 def drop(src: Any, keys: Tuple[str, ...]) -> Any:
 417     if isinstance(src, dict):
 418         avoid = set()
 419         for k in keys:
 420             for k in match_keys(src, k):
 421                 avoid.add(k)
 422         return {k: v for k, v in src.items() if not k in avoid}
 423 
 424     # if isinstance(src, (list, tuple)):
 425     #     l = len(src)
 426     #     avoid = set()
 427     #     for k in keys:
 428     #         for i in match_indices(src, k):
 429     #             avoid.add(i if i >= 0 else i + l)
 430     #     return tuple(v for i, v in enumerate(src) if not i in avoid)
 431 
 432     if isinstance(src, (list, tuple)):
 433         return tuple(drop(e, keys) for e in src if isinstance(e, dict))
 434 
 435     msg = f'can\'t drop properties from value of type {typeof(src)}'
 436     raise Exception(msg)
 437 
 438 
 439 final_fallbacks: Dict[str, Callable] = {
 440     '+': pick,
 441     ':+:': pick,
 442     ':+': pick,
 443     '+:': pick,
 444     '/+': pick,
 445     '+/': pick,
 446 
 447     '-': drop,
 448     ':-:': drop,
 449     ':-': drop,
 450     '-:': drop,
 451     '/-': drop,
 452     '-/': drop,
 453 }
 454 
 455 
 456 # extra imports for the `python-lambda` option
 457 
 458 pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
 459 
 460 if any(s in pyf for s in argv):
 461     import decimal
 462     import fractions
 463     import json
 464     import functools
 465     import itertools
 466     import math
 467     Math = math
 468     from math import \
 469         acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \
 470         copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \
 471         fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \
 472         isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \
 473         log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \
 474         radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp
 475     try:
 476         from math import cbrt, exp2
 477     except Exception:
 478         pass
 479     power = pow
 480     import operator
 481     import statistics
 482     import string
 483     import textwrap
 484     import time
 485     import urllib.parse
 486 
 487 
 488 apo = '\''
 489 apos = '\''
 490 backquote = '`'
 491 backtick = '`'
 492 ball = ''
 493 block = ''
 494 btick = '`'
 495 bullet = ''
 496 cdot = '·'
 497 circle = ''
 498 cross = '×'
 499 dquo = '"'
 500 dquote = '"'
 501 emdash = ''
 502 endash = ''
 503 ge = ''
 504 geq = ''
 505 hellip = ''
 506 hole = ''
 507 lcurly = '{'
 508 ldquo = ''
 509 ldquote = ''
 510 le = ''
 511 leq = ''
 512 mdash = ''
 513 mdot = '·'
 514 miniball = ''
 515 ndash = ''
 516 neq = ''
 517 rcurly = '}'
 518 rdquo = ''
 519 rdquote = ''
 520 sball = ''
 521 square = ''
 522 squo = '\''
 523 squote = '\''
 524 
 525 
 526 def dive(into: Any, doing: Callable) -> Any:
 527     'Transform a nested value by calling a func via depth-first recursion.'
 528 
 529     # support args in either order
 530     if callable(into):
 531         into, doing = doing, into
 532 
 533     return _dive_kv(None, into, doing)
 534 
 535 deepmap = dive
 536 dive1 = dive
 537 
 538 
 539 def divebin(x: Any, y: Any, doing: Callable) -> Any:
 540     'Nested 2-value version of depth-first-recursive func dive.'
 541 
 542     # support args in either order
 543     if callable(x):
 544         x, y, doing = y, doing, x
 545 
 546     narg = required_arg_count(doing)
 547     if narg == 2:
 548         return dive(x, lambda a: dive(y, lambda b: doing(a, b)))
 549     if narg == 4:
 550         return dive(x, lambda i, a: dive(y, lambda j, b: doing(i, a, j, b)))
 551     raise Exception('divebin(...) only supports funcs with 2 or 4 args')
 552 
 553 bindive = divebin
 554 # diveboth = divebin
 555 # dualdive = divebin
 556 # duodive = divebin
 557 dive2 = divebin
 558 
 559 
 560 def _dive_kv(key: Any, into: Any, doing: Callable) -> Any:
 561     if isinstance(into, dict):
 562         return {k: _dive_kv(k, v, doing) for k, v in into.items()}
 563     if isinstance(into, Iterable) and not isinstance(into, str):
 564         return [_dive_kv(i, e, doing) for i, e in enumerate(into)]
 565 
 566     narg = required_arg_count(doing)
 567     return doing(key, into) if narg == 2 else doing(into)
 568 
 569 
 570 def rescue(attempt: Callable, fallback: Any = None) -> Any:
 571     try:
 572         return attempt()
 573     except Exception as e:
 574         if callable(fallback):
 575             return fallback(e)
 576         return fallback
 577 
 578 catch = rescue
 579 recover = rescue
 580 rescued = rescue
 581 
 582 
 583 def required_arg_count(f: Callable) -> int:
 584     if isinstance(f, type):
 585         return 1
 586 
 587     meta = getfullargspec(f)
 588     n = len(meta.args)
 589     if meta.defaults:
 590         n -= len(meta.defaults)
 591     return n
 592 
 593 
 594 # deny file-access to expression-evaluators
 595 open = None
 596 
 597 try:
 598     # load data, trying to handle help-like options as well
 599     try:
 600         data = load(stdin.buffer)
 601     except Exception as e:
 602         if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
 603             show_help(None)
 604         else:
 605             raise e
 606 
 607     data = zoom(data, tuple(argv[1:]))
 608     dump(data, stdout, indent=2, separators=(',', ': '),
 609         allow_nan=False, check_circular=False)
 610     stdout.write('\n')
 611 except BrokenPipeError:
 612     # quit quietly, instead of showing a confusing error message
 613     stderr.close()
 614     exit(0)
 615 except KeyboardInterrupt:
 616     exit(2)
 617 except Exception as e:
 618     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 619     exit(1)