File: zj.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from inspect import getfullargspec
  27 from itertools import islice
  28 from json import load, dump, dumps
  29 from re import compile as compile_re
  30 from sys import argv, stderr, stdin, stdout
  31 from typing import Any, Callable, Dict, Iterable, Tuple
  32 
  33 
  34 info_msg = '''
  35 zj [keys/indices...]
  36 
  37 
  38 Zoom Json digs into a subset of valid JSON input, using the given mix of
  39 keys and array-indices, the latter being either 0-based or negative, to
  40 index backward from the ends of arrays.
  41 
  42 Zooming on object keys is first tried as an exact key-match, failing that
  43 as a case-insensitive key-match (first such match): when both approaches
  44 fail, if the key is a valid integer, the key at the (even negative) index
  45 given is used.
  46 
  47 Invalid array-indices and missing object-keys result in null values, when
  48 none of the special keys/fallbacks shown later apply.
  49 
  50 You can slice arrays the exclusive/go/python way using index-pairs with a
  51 `:` between the start/end pair, as long as it's a single argument; you can
  52 even use `..` as the index-pair separator to include the stop index in the
  53 result. Either way, as with go/python, you can omit either of the indices
  54 when slicing.
  55 
  56 Special key `.` acts as implicit loops on arrays, and even objects without
  57 that specific key: in the unlikely case that an object has `.` as one of
  58 its keys, you can use one of loop-fallback aliases, shown later.
  59 
  60 Another special key is `+` (no quotes): when used, the rest of the keys
  61 are used `in parallel`, allowing multiple picks from the current value.
  62 
  63 Similar to `+`, the `-` fallback-key drops keys, which means all items are
  64 picked, except for those mentioned after the `-`.
  65 
  66 In case any of the special keys are actual keys in the data loaded, some
  67 aliases are available:
  68 
  69     .   /.  ./  :.  .:
  70     +   /+  +/  :+  +:
  71     -   /-  -/  :-  -:
  72 
  73     .i   :i   .info    :info     :info:
  74     .k   :k   .keys    :keys     :keys:
  75     .l   :l   .len     .length   :len      :len:    :length    :length:
  76               .tally   :tally    :tally:
  77     .t   :t   .type    :type     :type:
  78     .u   :u   .unique  :unique   :unique:
  79 
  80 These aliases allow using the special functionality even on objects whose
  81 keys match some of these special names, as it's extremely unlikely data use
  82 all aliases as actual keys at any level.
  83 
  84 The only input supported is valid JSON coming from standard-input: there's
  85 no way to load files using their names. To load data from files/URIs use
  86 tools like `cat` or `curl`, and pipe their output into this tool.
  87 '''
  88 
  89 slice_re = compile_re('''^(([+-]?[0-9]+)?)(:|\\.\\.)(([+-]?[0-9]+)?)$''')
  90 
  91 
  92 def zoom(data: Any, keys: Tuple[str, ...]) -> Any:
  93     eval_due = False
  94     pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
  95 
  96     for i, k in enumerate(keys):
  97         try:
  98             if eval_due:
  99                 data = eval(k)(data)
 100                 eval_due = False
 101                 continue
 102 
 103             if isinstance(data, dict):
 104                 m = match_key(data, k)
 105                 if m in data:
 106                     data = data[m]
 107                     continue
 108                 m = slice_re.match(k)
 109                 if m:
 110                     data = {k: data[k] for k in match_keys(data, k)}
 111                     continue
 112 
 113             if isinstance(data, (list, tuple)):
 114                 try:
 115                     i = int(k)
 116                     l = len(data)
 117                     data = data[i] if -l <= i < l else None
 118                     continue
 119                 except Exception:
 120                     m = slice_re.match(k)
 121                     if m:
 122                         data = [data[i] for i in match_indices(data, k)]
 123                         continue
 124 
 125             if k in pyf:
 126                 eval_due = True
 127                 continue
 128 
 129             if k in ('.', '/.', './', ':.', '.:'):
 130                 if isinstance(data, dict):
 131                     rest = tuple(keys[i + 1:])
 132                     return {k: zoom(v, rest) for k, v in data.items()}
 133                 if isinstance(data, (list, tuple)):
 134                     rest = tuple(keys[i + 1:])
 135                     return tuple(zoom(v, rest) for v in data)
 136 
 137                 # doing nothing amounts to an identity-op for simple values
 138                 continue
 139 
 140             fn = final_fallbacks.get(k, None)
 141             if fn:
 142                 return fn(data, tuple(keys[i + 1:]))
 143 
 144             fn = fallbacks.get(k, None)
 145             if fn:
 146                 data = fn(data)
 147                 continue
 148 
 149             if isinstance(data, (dict, list, tuple)):
 150                 data = None
 151                 continue
 152 
 153             kind = typeof(data)
 154             msg = f'value of type {kind} has no properties to zoom into'
 155             raise Exception(msg)
 156         except Exception as e:
 157             key_path = ' > '.join(islice(keys, None, i + 1))
 158             raise Exception(f'{key_path}: {e}')
 159 
 160     return data
 161 
 162 def match_key(src: Dict, key: str) -> str:
 163     if key in src:
 164         return key
 165 
 166     low = key.casefold()
 167     for k in src.keys():
 168         if low == k.casefold():
 169             return k
 170 
 171     try:
 172         i = int(key)
 173         l = len(src)
 174         if i < 0:
 175             i += l
 176         if i < 0 or i >= l:
 177             return None
 178         for j, k in enumerate(src.keys()):
 179             if i == j:
 180                 return k
 181     except Exception:
 182         return key
 183 
 184     return key
 185 
 186 def match_keys(src: Any, key: str) -> Iterable:
 187     if isinstance(src, (list, tuple)):
 188         yield from match_indices(src, key)
 189         yield from match_fallbacks(src, key)
 190         return
 191 
 192     if isinstance(src, dict):
 193         if key in src:
 194             yield key
 195             return
 196 
 197         low = key.casefold()
 198         for k in src.keys():
 199             if low == k.casefold():
 200                 yield k
 201                 return
 202 
 203         yield from match_indices(src, key)
 204         yield from match_fallbacks(src, key)
 205         return
 206 
 207     yield from match_fallbacks(src, key)
 208 
 209 def match_indices(src: Any, key: str) -> Iterable:
 210     try:
 211         i = int(key)
 212 
 213         if isinstance(src, (list, tuple)):
 214             l = len(src)
 215             yield src[i] if -l <= i < l else None
 216             return
 217 
 218         if isinstance(src, dict):
 219             l = len(src)
 220             if i < 0:
 221                 i += l
 222             if i < 0 or i >= l:
 223                 return
 224 
 225             for j, k in enumerate(src.keys()):
 226                 if i == j:
 227                     yield k
 228                     return
 229 
 230         return
 231     except Exception:
 232         pass
 233 
 234     m = slice_re.match(key)
 235     if not m:
 236         return
 237 
 238     l = len(src)
 239 
 240     (start, _, kind, stop, _) = m.groups()
 241     start = int(start) if start != '' else 0
 242     stop = int(stop) if stop != '' else l
 243 
 244     if start < 0:
 245         start += l
 246     start = max(start, 0)
 247     if stop < 0:
 248         stop += l
 249     stop = min(stop, l)
 250     if kind == '..':
 251         stop += 1
 252     stop = min(stop, l)
 253 
 254     if start > stop:
 255         return
 256     if (start < 0 and stop < 0) or (start >= l and stop >= l):
 257         return
 258 
 259     if isinstance(src, dict):
 260         for i, k in enumerate(src.keys()):
 261             if i >= stop:
 262                 return
 263             if start <= i:
 264                 yield k
 265         return
 266 
 267     if isinstance(src, (list, tuple)):
 268         yield from range(start, stop)
 269         return
 270 
 271 def match_fallbacks(src: Any, key: str) -> Iterable:
 272     fn = fallbacks.get(key, None)
 273     if fn:
 274         yield fn(src)
 275 
 276 def keys(src: Any) -> Any:
 277     if isinstance(src, dict):
 278         return tuple(src.keys())
 279     if isinstance(src, (list, tuple)):
 280         return tuple(range(len(src)))
 281     return None
 282 
 283 def info(x: Any) -> str:
 284     if isinstance(x, dict):
 285         return f'object ({len(x)} items)'
 286     if isinstance(x, (list, tuple)):
 287         return f'array ({len(x)} items)'
 288     return typeof(x)
 289 
 290 def tally(x: Any) -> Any:
 291     if not isinstance(x, (list, tuple)):
 292         return None
 293 
 294     tally = {}
 295     for v in x:
 296         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 297         if s in tally:
 298             tally[s] += 1
 299         else:
 300             tally[s] = 1
 301     keys = sorted(tally.keys(), key=lambda k: tally[k], reverse=True)
 302     return {k: tally[k] for k in keys}
 303 
 304 def typeof(x: Any) -> str:
 305     return {
 306         type(None): 'null',
 307         bool: 'boolean',
 308         dict: 'object',
 309         float: 'number',
 310         int: 'number',
 311         str: 'string',
 312         list: 'array',
 313         tuple: 'array',
 314     }.get(type(x), 'other')
 315 
 316 def unique(x: Any) -> Any:
 317     if not isinstance(x, (list, tuple)):
 318         return x
 319 
 320     got = set()
 321     unique = []
 322     for v in x:
 323         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 324         if s in got:
 325             continue
 326         unique.append(v)
 327         got.add(s)
 328     return unique
 329 
 330 
 331 fallbacks: Dict[str, Callable] = {
 332     '.i': info,
 333     '.info': info,
 334     ':i': info,
 335     ':info': info,
 336     ':info:': info,
 337     '.k': keys,
 338     '.keys': keys,
 339     ':keys': keys,
 340     ':keys:': keys,
 341     '.kind': typeof,
 342     ':kind': typeof,
 343     ':kind:': typeof,
 344     '.l': len,
 345     '.len': len,
 346     '.length': len,
 347     ':l': len,
 348     ':len': len,
 349     ':length': len,
 350     ':len:': len,
 351     ':length:': len,
 352     '.tally': tally,
 353     ':tally': tally,
 354     ':tally:': tally,
 355     '.t': typeof,
 356     '.type': typeof,
 357     ':t': typeof,
 358     ':type': typeof,
 359     ':type:': typeof,
 360     '.u': unique,
 361     '.unique': unique,
 362     ':u': unique,
 363     ':unique': unique,
 364     ':u:': unique,
 365     ':unique:': unique,
 366 }
 367 
 368 
 369 def pick(src: Any, keys: Tuple[str, ...]) -> Any:
 370     if isinstance(src, dict):
 371         picked = {}
 372         for k in keys:
 373             for k in match_keys(src, k):
 374                 picked[k] = src[k]
 375         return picked
 376 
 377     # if isinstance(src, (list, tuple)):
 378     #     picked = []
 379     #     for k in keys:
 380     #         for i in match_indices(src, k):
 381     #             picked.append(src[i])
 382     #     return tuple(picked)
 383 
 384     if isinstance(src, (list, tuple)):
 385         return tuple(pick(e, keys) for e in src if isinstance(e, dict))
 386 
 387     msg = f'can\'t pick properties from value of type {typeof(src)}'
 388     raise Exception(msg)
 389 
 390 def drop(src: Any, keys: Tuple[str, ...]) -> Any:
 391     if isinstance(src, dict):
 392         avoid = set()
 393         for k in keys:
 394             for k in match_keys(src, k):
 395                 avoid.add(k)
 396         return {k: v for k, v in src.items() if not k in avoid}
 397 
 398     # if isinstance(src, (list, tuple)):
 399     #     l = len(src)
 400     #     avoid = set()
 401     #     for k in keys:
 402     #         for i in match_indices(src, k):
 403     #             avoid.add(i if i >= 0 else i + l)
 404     #     return tuple(v for i, v in enumerate(src) if not i in avoid)
 405 
 406     if isinstance(src, (list, tuple)):
 407         return tuple(drop(e, keys) for e in src if isinstance(e, dict))
 408 
 409     msg = f'can\'t drop properties from value of type {typeof(src)}'
 410     raise Exception(msg)
 411 
 412 
 413 final_fallbacks: Dict[str, Callable] = {
 414     '+': pick,
 415     ':+:': pick,
 416     ':+': pick,
 417     '+:': pick,
 418     '/+': pick,
 419     '+/': pick,
 420 
 421     '-': drop,
 422     ':-:': drop,
 423     ':-': drop,
 424     '-:': drop,
 425     '/-': drop,
 426     '-/': drop,
 427 }
 428 
 429 
 430 # extra imports for the `python-lambda` option
 431 
 432 pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
 433 
 434 if any(s in pyf for s in argv):
 435     import decimal
 436     import fractions
 437     import json
 438     import functools
 439     import itertools
 440     import math
 441     Math = math
 442     from math import \
 443         acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \
 444         copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \
 445         fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \
 446         isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \
 447         log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \
 448         radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp
 449     try:
 450         from math import cbrt, exp2
 451     except Exception:
 452         pass
 453     power = pow
 454     import operator
 455     import statistics
 456     import string
 457     import textwrap
 458     import time
 459     import urllib.parse
 460 
 461 
 462 apo = '\''
 463 apos = '\''
 464 backquote = '`'
 465 backtick = '`'
 466 ball = ''
 467 block = ''
 468 btick = '`'
 469 bullet = ''
 470 cdot = '·'
 471 circle = ''
 472 cross = '×'
 473 dquo = '"'
 474 dquote = '"'
 475 emdash = ''
 476 endash = ''
 477 ge = ''
 478 geq = ''
 479 hellip = ''
 480 hole = ''
 481 lcurly = '{'
 482 ldquo = ''
 483 ldquote = ''
 484 le = ''
 485 leq = ''
 486 mdash = ''
 487 mdot = '·'
 488 miniball = ''
 489 ndash = ''
 490 neq = ''
 491 rcurly = '}'
 492 rdquo = ''
 493 rdquote = ''
 494 sball = ''
 495 square = ''
 496 squo = '\''
 497 squote = '\''
 498 
 499 
 500 def dive(into: Any, doing: Callable) -> Any:
 501     'Transform a nested value by calling a func via depth-first recursion.'
 502 
 503     # support args in either order
 504     if callable(into):
 505         into, doing = doing, into
 506 
 507     return _dive_kv(None, into, doing)
 508 
 509 deepmap = dive1 = dive
 510 
 511 def divebin(x: Any, y: Any, doing: Callable) -> Any:
 512     'Nested 2-value version of depth-first-recursive func dive.'
 513 
 514     # support args in either order
 515     if callable(x):
 516         x, y, doing = y, doing, x
 517 
 518     narg = required_arg_count(doing)
 519     if narg == 2:
 520         return dive(x, lambda a: dive(y, lambda b: doing(a, b)))
 521     if narg == 4:
 522         return dive(x, lambda i, a: dive(y, lambda j, b: doing(i, a, j, b)))
 523     raise Exception('divebin(...) only supports funcs with 2 or 4 args')
 524 
 525 bindive = dive2 = divebin
 526 
 527 def _dive_kv(key: Any, into: Any, doing: Callable) -> Any:
 528     if isinstance(into, dict):
 529         return {k: _dive_kv(k, v, doing) for k, v in into.items()}
 530     if isinstance(into, Iterable) and not isinstance(into, str):
 531         return [_dive_kv(i, e, doing) for i, e in enumerate(into)]
 532 
 533     narg = required_arg_count(doing)
 534     return doing(key, into) if narg == 2 else doing(into)
 535 
 536 def rescue(attempt: Callable, fallback: Any = None) -> Any:
 537     try:
 538         return attempt()
 539     except Exception as e:
 540         if callable(fallback):
 541             return fallback(e)
 542         return fallback
 543 
 544 catch = recover = recovered = rescued = rescue
 545 
 546 def required_arg_count(f: Callable) -> int:
 547     if isinstance(f, type):
 548         return 1
 549 
 550     meta = getfullargspec(f)
 551     n = len(meta.args)
 552     if meta.defaults:
 553         n -= len(meta.defaults)
 554     return n
 555 
 556 
 557 # deny file-access to expression-evaluators
 558 open = None
 559 
 560 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
 561     print(info_msg.strip(), file=stderr)
 562     exit(0)
 563 
 564 compact_opts = (
 565     '-c', '--c', '-compact', '--compact', '-j0', '--j0', '-json0', '--json0',
 566 )
 567 
 568 args = argv[1:]
 569 compact = False
 570 
 571 while len(args) > 0:
 572     if args[0] == '--':
 573         args = args[1:]
 574         break
 575 
 576     if args[0] in compact_opts:
 577         compact = True
 578         args = args[1:]
 579         continue
 580 
 581     break
 582 
 583 try:
 584     data = load(stdin.buffer)
 585     data = zoom(data, tuple(args))
 586     v, w = data, stdout
 587     i = None if compact else 2
 588     s = (',', ':') if compact else (',', ': ')
 589     dump(v, w, indent=i, separators=s, allow_nan=False, check_circular=False)
 590     stdout.write('\n')
 591 except BrokenPipeError:
 592     # quit quietly, instead of showing a confusing error message
 593     stderr.close()
 594     exit(0)
 595 except KeyboardInterrupt:
 596     exit(2)
 597 except Exception as e:
 598     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 599     exit(1)