File: zj.py
   1 #!/usr/bin/python
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright (c) 2026 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the "Software"), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from inspect import getfullargspec
  27 from itertools import islice
  28 from json import load, dump, dumps
  29 from re import compile as compile_re
  30 from sys import argv, stderr, stdin, stdout
  31 from typing import Any, Callable, Dict, Iterable, List, Tuple
  32 
  33 
  34 info_msg = '''
  35 zj [options...] [keys / indices / operations...]
  36 
  37 
  38 Zoom Json digs into a subset of valid JSON input, using the given mix of
  39 keys and array-indices, the latter being either 0-based or negative, to
  40 index backward from the ends of arrays.
  41 
  42 Zooming on object keys is first tried as an exact key-match, failing that
  43 as a case-insensitive key-match (first such match): when both approaches
  44 fail, if the key is a valid integer, the key at the (even negative) index
  45 given is used.
  46 
  47 Invalid array-indices and missing object-keys result in null values, when
  48 none of the special keys/fallbacks shown later apply.
  49 
  50 You can slice arrays the exclusive/go/python way using index-pairs with a
  51 `:` between the start/end pair, as long as it's a single argument; you can
  52 even use `..` as the index-pair separator to include the stop index in the
  53 result. Either way, as with go/python, you can omit either of the indices
  54 when slicing.
  55 
  56 Special key `.` acts as implicit loops on arrays, and even objects without
  57 that specific key: in the unlikely case that an object has `.` as one of
  58 its keys, you can use one of loop-fallback aliases, shown later.
  59 
  60 Special key `,` evaluates all arguments left in parallel, splitting them
  61 using a comma: the result is an array whose items are zoomed-in separately
  62 using each comma-split sub-sequence. If any of the aliases for `,` is used,
  63 the split-symbol will have to be the same as the specific alias used.
  64 
  65 Another special key is `+` (no quotes): when used, the rest of the keys
  66 are used `in parallel`, allowing multiple picks from the current value.
  67 
  68 Similar to `+`, the `-` fallback-key drops keys, which means all items are
  69 picked, except for those mentioned after the `-`.
  70 
  71 In case any of the special keys are actual keys in the data loaded, some
  72 aliases are available:
  73 
  74     .   /.  ./  :.  .:
  75     ,   /,  ,/  :,  ,:
  76     +   /+  +/  :+  +:
  77     -   /-  -/  :-  -:
  78 
  79     .i   :i   .info    :info     :info:
  80     .k   :k   .keys    :keys     :keys:
  81     .l   :l   .len     .length   :len      :len:    :length    :length:
  82               .tally   :tally    :tally:
  83     .t   :t   .type    :type     :type:
  84     .u   :u   .unique  :unique   :unique:
  85 
  86 These aliases allow using the special functionality even on objects whose
  87 keys match some of these special names, as it's extremely unlikely data use
  88 all aliases as actual keys at any level.
  89 
  90 The only input supported is valid JSON coming from standard-input: there's
  91 no way to load files using their names. To load data from files/URIs use
  92 tools like `cat` or `curl`, and pipe their output into this tool.
  93 
  94 The options are, available both in single and double-dash versions
  95 
  96     -h, -help                    show this help message
  97     -c, -compact, -j0, -json0    emit result as squished single-line JSON
  98 '''
  99 
 100 slice_re = compile_re('''^(([+-]?[0-9]+)?)(:|\\.\\.)(([+-]?[0-9]+)?)$''')
 101 
 102 
 103 def zoom(data: Any, keys: Tuple[str, ...]) -> Any:
 104     eval_due = False
 105     pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
 106 
 107     for i, k in enumerate(keys):
 108         try:
 109             if eval_due:
 110                 data = eval(k)(data)
 111                 eval_due = False
 112                 continue
 113 
 114             if isinstance(data, dict):
 115                 m = match_key(data, k)
 116                 if m in data:
 117                     data = data[m]
 118                     continue
 119                 m = slice_re.match(k)
 120                 if m:
 121                     data = {k: data[k] for k in match_keys(data, k)}
 122                     continue
 123 
 124             if isinstance(data, (list, tuple)):
 125                 try:
 126                     i = int(k)
 127                     l = len(data)
 128                     data = data[i] if -l <= i < l else None
 129                     continue
 130                 except Exception:
 131                     m = slice_re.match(k)
 132                     if m:
 133                         data = [data[i] for i in match_indices(data, k)]
 134                         continue
 135 
 136             if k in pyf:
 137                 eval_due = True
 138                 continue
 139 
 140             if k in ('.', '/.', './', ':.', '.:'):
 141                 if isinstance(data, dict):
 142                     rest = tuple(keys[i + 1:])
 143                     return {k: zoom(v, rest) for k, v in data.items()}
 144                 if isinstance(data, (list, tuple)):
 145                     rest = tuple(keys[i + 1:])
 146                     return tuple(zoom(v, rest) for v in data)
 147 
 148                 # doing nothing amounts to an identity-op for simple values
 149                 continue
 150 
 151             if k in (',', '/,', ',/', ':,', ',:'):
 152                 if isinstance(data, (dict, list, tuple)):
 153                     sub = split(keys[i + 1:], k)
 154                     return [zoom(data, rest) for rest in sub]
 155 
 156                 # doing nothing amounts to an identity-op for simple values
 157                 continue
 158 
 159             fn = final_fallbacks.get(k, None)
 160             if fn:
 161                 return fn(data, tuple(keys[i + 1:]))
 162 
 163             fn = fallbacks.get(k, None)
 164             if fn:
 165                 data = fn(data)
 166                 continue
 167 
 168             if isinstance(data, (dict, list, tuple)):
 169                 data = None
 170                 continue
 171 
 172             kind = typeof(data)
 173             msg = f'value of type {kind} has no properties to zoom into'
 174             raise Exception(msg)
 175         except Exception as e:
 176             key_path = ' > '.join(islice(keys, None, i + 1))
 177             raise Exception(f'{key_path}: {e}')
 178 
 179     return data
 180 
 181 def match_key(src: Dict, key: str) -> str:
 182     if key in src:
 183         return key
 184 
 185     low = key.casefold()
 186     for k in src.keys():
 187         if low == k.casefold():
 188             return k
 189 
 190     try:
 191         i = int(key)
 192         l = len(src)
 193         if i < 0:
 194             i += l
 195         if i < 0 or i >= l:
 196             return None
 197         for j, k in enumerate(src.keys()):
 198             if i == j:
 199                 return k
 200     except Exception:
 201         return key
 202 
 203     return key
 204 
 205 def match_keys(src: Any, key: str) -> Iterable:
 206     if isinstance(src, (list, tuple)):
 207         yield from match_indices(src, key)
 208         yield from match_fallbacks(src, key)
 209         return
 210 
 211     if isinstance(src, dict):
 212         if key in src:
 213             yield key
 214             return
 215 
 216         low = key.casefold()
 217         for k in src.keys():
 218             if low == k.casefold():
 219                 yield k
 220                 return
 221 
 222         yield from match_indices(src, key)
 223         yield from match_fallbacks(src, key)
 224         return
 225 
 226     yield from match_fallbacks(src, key)
 227 
 228 def match_indices(src: Any, key: str) -> Iterable:
 229     try:
 230         i = int(key)
 231 
 232         if isinstance(src, (list, tuple)):
 233             l = len(src)
 234             yield src[i] if -l <= i < l else None
 235             return
 236 
 237         if isinstance(src, dict):
 238             l = len(src)
 239             if i < 0:
 240                 i += l
 241             if i < 0 or i >= l:
 242                 return
 243 
 244             for j, k in enumerate(src.keys()):
 245                 if i == j:
 246                     yield k
 247                     return
 248 
 249         return
 250     except Exception:
 251         pass
 252 
 253     m = slice_re.match(key)
 254     if not m:
 255         return
 256 
 257     l = len(src)
 258 
 259     (start, _, kind, stop, _) = m.groups()
 260     start = int(start) if start != '' else 0
 261     stop = int(stop) if stop != '' else l
 262 
 263     if start < 0:
 264         start += l
 265     start = max(start, 0)
 266     if stop < 0:
 267         stop += l
 268     stop = min(stop, l)
 269     if kind == '..':
 270         stop += 1
 271     stop = min(stop, l)
 272 
 273     if start > stop:
 274         return
 275     if (start < 0 and stop < 0) or (start >= l and stop >= l):
 276         return
 277 
 278     if isinstance(src, dict):
 279         for i, k in enumerate(src.keys()):
 280             if i >= stop:
 281                 return
 282             if start <= i:
 283                 yield k
 284         return
 285 
 286     if isinstance(src, (list, tuple)):
 287         yield from range(start, stop)
 288         return
 289 
 290 def match_fallbacks(src: Any, key: str) -> Iterable:
 291     fn = fallbacks.get(key, None)
 292     if fn:
 293         yield fn(src)
 294 
 295 def split(src: Iterable, by: Any) -> List:
 296     split = []
 297     latest = []
 298 
 299     for e in src:
 300         if e == by:
 301             split.append(latest)
 302             latest = []
 303         else:
 304             latest.append(e)
 305 
 306     if len(latest) > 0:
 307         split.append(latest)
 308     return split
 309 
 310 def keys(src: Any) -> Any:
 311     if isinstance(src, dict):
 312         return tuple(src.keys())
 313     if isinstance(src, (list, tuple)):
 314         return tuple(range(len(src)))
 315     return None
 316 
 317 def info(x: Any) -> str:
 318     if isinstance(x, dict):
 319         return f'object ({len(x)} items)'
 320     if isinstance(x, (list, tuple)):
 321         return f'array ({len(x)} items)'
 322     return typeof(x)
 323 
 324 def sort_keys(kv: Dict) -> Dict:
 325     sorted_keys = sorted(kv.keys())
 326     return {k: kv[k] for k in sorted_keys}
 327 
 328 def tally(x: Any) -> Any:
 329     if not isinstance(x, (list, tuple)):
 330         return None
 331 
 332     tally = {}
 333     for v in x:
 334         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 335         if s in tally:
 336             tally[s] += 1
 337         else:
 338             tally[s] = 1
 339     keys = sorted(tally.keys(), key=lambda k: tally[k], reverse=True)
 340     return {k: tally[k] for k in keys}
 341 
 342 def typeof(x: Any) -> str:
 343     return {
 344         type(None): 'null',
 345         bool: 'boolean',
 346         dict: 'object',
 347         float: 'number',
 348         int: 'number',
 349         str: 'string',
 350         list: 'array',
 351         tuple: 'array',
 352     }.get(type(x), 'other')
 353 
 354 def unique(x: Any) -> Any:
 355     if not isinstance(x, (list, tuple)):
 356         return x
 357 
 358     got = set()
 359     unique = []
 360     for v in x:
 361         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 362         if s in got:
 363             continue
 364         unique.append(v)
 365         got.add(s)
 366     return unique
 367 
 368 
 369 fallbacks: Dict[str, Callable] = {
 370     '.i': info,
 371     '.info': info,
 372     ':i': info,
 373     ':info': info,
 374     ':info:': info,
 375     '.k': keys,
 376     '.keys': keys,
 377     ':keys': keys,
 378     ':keys:': keys,
 379     '.kind': typeof,
 380     ':kind': typeof,
 381     ':kind:': typeof,
 382     '.l': len,
 383     '.len': len,
 384     '.length': len,
 385     ':l': len,
 386     ':len': len,
 387     ':length': len,
 388     ':len:': len,
 389     ':length:': len,
 390     '.sortkeys': sort_keys,
 391     ':sortkeys': sort_keys,
 392     ':sortkeys:': sort_keys,
 393     '.tally': tally,
 394     ':tally': tally,
 395     ':tally:': tally,
 396     '.t': typeof,
 397     '.type': typeof,
 398     ':t': typeof,
 399     ':type': typeof,
 400     ':type:': typeof,
 401     '.u': unique,
 402     '.unique': unique,
 403     ':u': unique,
 404     ':unique': unique,
 405     ':u:': unique,
 406     ':unique:': unique,
 407 }
 408 
 409 
 410 def pick(src: Any, keys: Tuple[str, ...]) -> Any:
 411     if isinstance(src, dict):
 412         picked = {}
 413         for k in keys:
 414             for k in match_keys(src, k):
 415                 picked[k] = src[k]
 416         return picked
 417 
 418     # if isinstance(src, (list, tuple)):
 419     #     picked = []
 420     #     for k in keys:
 421     #         for i in match_indices(src, k):
 422     #             picked.append(src[i])
 423     #     return tuple(picked)
 424 
 425     if isinstance(src, (list, tuple)):
 426         return tuple(pick(e, keys) for e in src if isinstance(e, dict))
 427 
 428     msg = f'can\'t pick properties from value of type {typeof(src)}'
 429     raise Exception(msg)
 430 
 431 def drop(src: Any, keys: Tuple[str, ...]) -> Any:
 432     if isinstance(src, dict):
 433         avoid = set()
 434         for k in keys:
 435             for k in match_keys(src, k):
 436                 avoid.add(k)
 437         return {k: v for k, v in src.items() if not k in avoid}
 438 
 439     # if isinstance(src, (list, tuple)):
 440     #     l = len(src)
 441     #     avoid = set()
 442     #     for k in keys:
 443     #         for i in match_indices(src, k):
 444     #             avoid.add(i if i >= 0 else i + l)
 445     #     return tuple(v for i, v in enumerate(src) if not i in avoid)
 446 
 447     if isinstance(src, (list, tuple)):
 448         return tuple(drop(e, keys) for e in src if isinstance(e, dict))
 449 
 450     msg = f'can\'t drop properties from value of type {typeof(src)}'
 451     raise Exception(msg)
 452 
 453 
 454 final_fallbacks: Dict[str, Callable] = {
 455     '+': pick,
 456     ':+:': pick,
 457     ':+': pick,
 458     '+:': pick,
 459     '/+': pick,
 460     '+/': pick,
 461 
 462     '-': drop,
 463     ':-:': drop,
 464     ':-': drop,
 465     '-:': drop,
 466     '/-': drop,
 467     '-/': drop,
 468 }
 469 
 470 
 471 # extra imports for the `python-lambda` option
 472 
 473 pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
 474 
 475 if any(s in pyf for s in argv):
 476     import decimal
 477     import fractions
 478     import json
 479     import functools
 480     import itertools
 481     import math
 482     Math = math
 483     from math import \
 484         acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \
 485         copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \
 486         fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \
 487         isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \
 488         log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \
 489         radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp
 490     try:
 491         from math import cbrt, exp2
 492     except Exception:
 493         pass
 494     power = pow
 495     import operator
 496     import statistics
 497     import string
 498     import textwrap
 499     import time
 500     import urllib.parse
 501 
 502 
 503 apo = '\''
 504 apos = '\''
 505 backquote = '`'
 506 backtick = '`'
 507 ball = ''
 508 block = ''
 509 btick = '`'
 510 bullet = ''
 511 cdot = '·'
 512 circle = ''
 513 cross = '×'
 514 dquo = '"'
 515 dquote = '"'
 516 emdash = ''
 517 endash = ''
 518 ge = ''
 519 geq = ''
 520 hellip = ''
 521 hole = ''
 522 lcurly = '{'
 523 ldquo = ''
 524 ldquote = ''
 525 le = ''
 526 leq = ''
 527 mdash = ''
 528 mdot = '·'
 529 miniball = ''
 530 ndash = ''
 531 neq = ''
 532 rcurly = '}'
 533 rdquo = ''
 534 rdquote = ''
 535 sball = ''
 536 square = ''
 537 squo = '\''
 538 squote = '\''
 539 
 540 
 541 def dive(into: Any, doing: Callable) -> Any:
 542     'Transform a nested value by calling a func via depth-first recursion.'
 543 
 544     # support args in either order
 545     if callable(into):
 546         into, doing = doing, into
 547 
 548     return _dive_kv(None, into, doing)
 549 
 550 deepmap = dive1 = dive
 551 
 552 def divebin(x: Any, y: Any, doing: Callable) -> Any:
 553     'Nested 2-value version of depth-first-recursive func dive.'
 554 
 555     # support args in either order
 556     if callable(x):
 557         x, y, doing = y, doing, x
 558 
 559     narg = required_arg_count(doing)
 560     if narg == 2:
 561         return dive(x, lambda a: dive(y, lambda b: doing(a, b)))
 562     if narg == 4:
 563         return dive(x, lambda i, a: dive(y, lambda j, b: doing(i, a, j, b)))
 564     raise Exception('divebin(...) only supports funcs with 2 or 4 args')
 565 
 566 bindive = dive2 = divebin
 567 
 568 def _dive_kv(key: Any, into: Any, doing: Callable) -> Any:
 569     if isinstance(into, dict):
 570         return {k: _dive_kv(k, v, doing) for k, v in into.items()}
 571     if isinstance(into, Iterable) and not isinstance(into, str):
 572         return [_dive_kv(i, e, doing) for i, e in enumerate(into)]
 573 
 574     narg = required_arg_count(doing)
 575     return doing(key, into) if narg == 2 else doing(into)
 576 
 577 def rescue(attempt: Callable, fallback: Any = None) -> Any:
 578     try:
 579         return attempt()
 580     except Exception as e:
 581         if callable(fallback):
 582             return fallback(e)
 583         return fallback
 584 
 585 rescued = rescue
 586 
 587 def required_arg_count(f: Callable) -> int:
 588     if isinstance(f, type):
 589         return 1
 590 
 591     meta = getfullargspec(f)
 592     n = len(meta.args)
 593     if meta.defaults:
 594         n -= len(meta.defaults)
 595     return n
 596 
 597 
 598 # deny file-access to expression-evaluators
 599 open = None
 600 
 601 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
 602     print(info_msg.strip(), file=stderr)
 603     exit(0)
 604 
 605 compact_opts = (
 606     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 607 )
 608 
 609 args = argv[1:]
 610 compact = False
 611 
 612 while len(args) > 0:
 613     if args[0] == '--':
 614         args = args[1:]
 615         break
 616 
 617     if args[0] in compact_opts:
 618         compact = True
 619         args = args[1:]
 620         continue
 621 
 622     break
 623 
 624 try:
 625     data = load(stdin.buffer)
 626     data = zoom(data, tuple(args))
 627     v, w = data, stdout
 628     i = None if compact else 2
 629     s = (',', ':') if compact else (',', ': ')
 630     dump(v, w, indent=i, separators=s, allow_nan=False, check_circular=False)
 631     stdout.write('\n')
 632 except BrokenPipeError:
 633     # quit quietly, instead of showing a confusing error message
 634     stderr.close()
 635     exit(0)
 636 except KeyboardInterrupt:
 637     exit(2)
 638 except Exception as e:
 639     print(str(e), file=stderr)
 640     exit(1)