File: zj.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from inspect import getfullargspec
  27 from itertools import islice
  28 from json import load, dump, dumps
  29 from sys import argv, stderr, stdin, stdout
  30 from typing import Any, Callable, Dict, Iterable, NoReturn, Tuple
  31 
  32 
  33 from re import compile as compile_re
  34 
  35 
  36 info_msg = '''
  37 zj [keys/indices...]
  38 
  39 
  40 Zoom Json digs into a subset of valid JSON input, using the given mix of
  41 keys and array-indices, the latter being either 0-based or negative, to
  42 index backward from the ends of arrays.
  43 
  44 Zooming on object keys is first tried as an exact key-match, failing that
  45 as a case-insensitive key-match (first such match): when both approaches
  46 fail, if the key is a valid integer, the key at the (even negative) index
  47 given is used.
  48 
  49 Invalid array-indices and missing object-keys result in null values, when
  50 none of the special keys/fallbacks shown later apply.
  51 
  52 You can slice arrays the exclusive/go/python way using index-pairs with a
  53 `:` between the start/end pair, as long as it's a single argument; you can
  54 even use `..` as the index-pair separator to include the stop index in the
  55 result. Either way, as with go/python, you can omit either of the indices
  56 when slicing.
  57 
  58 Special key `.` acts as implicit loops on arrays, and even objects without
  59 that specific key: in the unlikely case that an object has `.` as one of
  60 its keys, you can use one of loop-fallback aliases, shown later.
  61 
  62 Another special key is `+` (no quotes): when used, the rest of the keys
  63 are used `in parallel`, allowing multiple picks from the current value.
  64 When picking array items, you can also use either type (`:` or `..`) of
  65 slicing, even mixing it with individual indices.
  66 
  67 Similar to `+`, the `-` fallback-key drops keys, which means all items are
  68 picked, except for those mentioned after the `-`.
  69 
  70 Unlike the looping special key, after the first `+` special-key, all keys
  71 following it, special or not, are picked normally.
  72 
  73 In case any of the special keys are actual keys in the data loaded, some
  74 aliases are available:
  75 
  76     .   /.  ./  :.  .:
  77     +   /+  +/  :+  +:
  78     -   /-  -/  :-  -:
  79 
  80     .i   :i   .info    :info     :info:
  81     .k   :k   .keys    :keys     :keys:
  82     .t   :t   .type    :type     :type:
  83     .l   :l   .len     .length   :len      :len:    :length    :length:
  84 
  85 These aliases allow using the special functionality even on objects whose
  86 keys match some of these special names, as it's extremely unlikely data use
  87 all aliases as actual keys at any level.
  88 
  89 The only input supported is valid JSON coming from standard-input: there's
  90 no way to load files using their names. To load data from files/URIs use
  91 tools like `cat` or `curl`, and pipe their output into this tool.
  92 '''
  93 
  94 slice_re = compile_re('''^(([+-]?[0-9]+)?)(:|\.\.)(([+-]?[0-9]+)?)$''')
  95 
  96 
  97 def zoom(data: Any, keys: Tuple[str, ...]) -> Any:
  98     eval_due = False
  99     pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
 100 
 101     for i, k in enumerate(keys):
 102         try:
 103             if eval_due:
 104                 data = eval(k)(data)
 105                 eval_due = False
 106                 continue
 107 
 108             if isinstance(data, dict):
 109                 m = match_key(data, k)
 110                 if m in data:
 111                     data = data[m]
 112                     continue
 113                 m = slice_re.match(k)
 114                 if m:
 115                     data = {k: data[k] for k in match_keys(data, k)}
 116                     continue
 117 
 118             if isinstance(data, (list, tuple)):
 119                 try:
 120                     i = int(k)
 121                     l = len(data)
 122                     data = data[i] if -l <= i < l else None
 123                     continue
 124                 except Exception:
 125                     m = slice_re.match(k)
 126                     if m:
 127                         data = [data[i] for i in match_indices(data, k)]
 128                         continue
 129 
 130             if k in pyf:
 131                 eval_due = True
 132                 continue
 133 
 134             if k in ('.', '/.', './', ':.', '.:'):
 135                 if isinstance(data, dict):
 136                     rest = tuple(keys[i + 1:])
 137                     return {k: zoom(v, rest) for k, v in data.items()}
 138                 if isinstance(data, (list, tuple)):
 139                     rest = tuple(keys[i + 1:])
 140                     return tuple(zoom(v, rest) for v in data)
 141 
 142                 # doing nothing amounts to an identity-op for simple values
 143                 continue
 144 
 145             fn = final_fallbacks.get(k, None)
 146             if fn:
 147                 return fn(data, tuple(keys[i + 1:]))
 148 
 149             fn = fallbacks.get(k, None)
 150             if fn:
 151                 data = fn(data)
 152                 continue
 153 
 154             if isinstance(data, (dict, list, tuple)):
 155                 data = None
 156                 continue
 157 
 158             kind = typeof(data)
 159             msg = f'value of type {kind} has no properties to zoom into'
 160             raise Exception(msg)
 161         except Exception as e:
 162             key_path = ' > '.join(islice(keys, None, i + 1))
 163             raise Exception(f'{key_path}: {e}')
 164 
 165     return data
 166 
 167 
 168 def match_key(src: Dict, key: str) -> str:
 169     if key in src:
 170         return key
 171 
 172     low = key.casefold()
 173     for k in src.keys():
 174         if low == k.casefold():
 175             return k
 176 
 177     try:
 178         i = int(key)
 179         l = len(src)
 180         if i < 0:
 181             i += l
 182         if i < 0 or i >= l:
 183             return None
 184         for j, k in enumerate(src.keys()):
 185             if i == j:
 186                 return k
 187     except Exception:
 188         return key
 189     return key
 190 
 191 
 192 def match_keys(src: Any, key: str) -> Iterable:
 193     if isinstance(src, (list, tuple)):
 194         yield from match_indices(src, key)
 195         yield from match_fallbacks(src, key)
 196         return
 197 
 198     if isinstance(src, dict):
 199         if key in src:
 200             yield key
 201             return
 202 
 203         low = key.casefold()
 204         for k in src.keys():
 205             if low == k.casefold():
 206                 yield k
 207                 return
 208 
 209         yield from match_indices(src, key)
 210         yield from match_fallbacks(src, key)
 211         return
 212 
 213     yield from match_fallbacks(src, key)
 214 
 215 
 216 def match_indices(src: Any, key: str) -> Iterable:
 217     try:
 218         i = int(key)
 219 
 220         if isinstance(src, (list, tuple)):
 221             l = len(src)
 222             yield src[i] if -l <= i < l else None
 223             return
 224 
 225         if isinstance(src, dict):
 226             l = len(src)
 227             if i < 0:
 228                 i += l
 229             if i < 0 or i >= l:
 230                 return
 231 
 232             for j, k in enumerate(src.keys()):
 233                 if i == j:
 234                     yield k
 235                     return
 236 
 237         return
 238     except Exception:
 239         pass
 240 
 241     m = slice_re.match(key)
 242     if not m:
 243         return
 244 
 245     l = len(src)
 246 
 247     (start, _, kind, stop, _) = m.groups()
 248     start = int(start) if start != '' else 0
 249     stop = int(stop) if stop != '' else l
 250 
 251     if start < 0:
 252         start += l
 253     start = max(start, 0)
 254     if stop < 0:
 255         stop += l
 256     stop = min(stop, l)
 257     if kind == '..':
 258         stop += 1
 259     stop = min(stop, l)
 260 
 261     if start > stop:
 262         return
 263     if (start < 0 and stop < 0) or (start >= l and stop >= l):
 264         return
 265 
 266 
 267     if isinstance(src, dict):
 268         for i, k in enumerate(src.keys()):
 269             if i >= stop:
 270                 return
 271             if start <= i:
 272                 yield k
 273         return
 274 
 275     if isinstance(src, (list, tuple)):
 276         yield from range(start, stop)
 277         return
 278 
 279 
 280 
 281 def match_fallbacks(src: Any, key: str) -> Iterable:
 282     fn = fallbacks.get(key, None)
 283     if fn:
 284         yield fn(src)
 285 
 286 
 287 def show_help(*_) -> NoReturn:
 288     print(info_msg.strip(), file=stderr)
 289     exit(1)
 290 
 291 
 292 def keys(src: Any) -> Any:
 293     if isinstance(src, dict):
 294         return tuple(src.keys())
 295     if isinstance(src, (list, tuple)):
 296         return tuple(range(len(src)))
 297     return None
 298 
 299 
 300 def info(x: Any) -> str:
 301     if isinstance(x, dict):
 302         return f'object ({len(x)} items)'
 303     if isinstance(x, (list, tuple)):
 304         return f'array ({len(x)} items)'
 305     return typeof(x)
 306 
 307 
 308 def tally(x: Any) -> Any:
 309     if not isinstance(x, (list, tuple)):
 310         return None
 311     tally = {}
 312     for v in x:
 313         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 314         if s in tally:
 315             tally[s] += 1
 316         else:
 317             tally[s] = 1
 318     keys = sorted(tally.keys(), key=lambda k: tally[k], reverse=True)
 319     return {k: tally[k] for k in keys}
 320 
 321 
 322 def typeof(x: Any) -> str:
 323     return {
 324         type(None): 'null',
 325         bool: 'boolean',
 326         dict: 'object',
 327         float: 'number',
 328         int: 'number',
 329         str: 'string',
 330         list: 'array',
 331         tuple: 'array',
 332     }.get(type(x), 'other')
 333 
 334 
 335 def unique(x: Any) -> Any:
 336     if not isinstance(x, (list, tuple)):
 337         return x
 338     got = set()
 339     unique = []
 340     for v in x:
 341         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 342         if s in got:
 343             continue
 344         unique.append(v)
 345         got.add(s)
 346     return unique
 347 
 348 
 349 fallbacks: Dict[str, Callable] = {
 350     '.h': show_help,
 351     '.help': show_help,
 352     ':h': show_help,
 353     ':help': show_help,
 354     ':help:': show_help,
 355     '.i': info,
 356     '.info': info,
 357     ':i': info,
 358     ':info': info,
 359     ':info:': info,
 360     '.k': keys,
 361     '.keys': keys,
 362     ':keys': keys,
 363     ':keys:': keys,
 364     '.kind': typeof,
 365     ':kind': typeof,
 366     ':kind:': typeof,
 367     '.l': len,
 368     '.len': len,
 369     '.length': len,
 370     ':l': len,
 371     ':len': len,
 372     ':length': len,
 373     ':len:': len,
 374     ':length:': len,
 375     '.tally': tally,
 376     ':tally': tally,
 377     ':tally:': tally,
 378     '.t': typeof,
 379     '.type': typeof,
 380     ':t': typeof,
 381     ':type': typeof,
 382     ':type:': typeof,
 383     '.u': unique,
 384     '.unique': unique,
 385     ':u': unique,
 386     ':unique': unique,
 387     ':u:': unique,
 388     ':unique:': unique,
 389 }
 390 
 391 
 392 def pick(src: Any, keys: Tuple[str, ...]) -> Any:
 393     if isinstance(src, dict):
 394         picked = {}
 395         for k in keys:
 396             for k in match_keys(src, k):
 397                 picked[k] = src[k]
 398         return picked
 399 
 400     if isinstance(src, (list, tuple)):
 401         picked = []
 402         for k in keys:
 403             for i in match_indices(src, k):
 404                 picked.append(src[i])
 405         return tuple(picked)
 406 
 407     msg = f'can\'t pick properties from value of type {typeof(src)}'
 408     raise Exception(msg)
 409 
 410 
 411 def drop(src: Any, keys: Tuple[str, ...]) -> Any:
 412     if isinstance(src, dict):
 413         avoid = set()
 414         for k in keys:
 415             for k in match_keys(src, k):
 416                 avoid.add(k)
 417         return {k: v for k, v in src.items() if not k in avoid}
 418 
 419     if isinstance(src, (list, tuple)):
 420         l = len(src)
 421         avoid = set()
 422         for k in keys:
 423             for i in match_indices(src, k):
 424                 avoid.add(i if i >= 0 else i + l)
 425         return tuple(v for i, v in enumerate(src) if not i in avoid)
 426 
 427     msg = f'can\'t drop properties from value of type {typeof(src)}'
 428     raise Exception(msg)
 429 
 430 
 431 final_fallbacks: Dict[str, Callable] = {
 432     '+': pick,
 433     ':+:': pick,
 434     ':+': pick,
 435     '+:': pick,
 436     '/+': pick,
 437     '+/': pick,
 438 
 439     '-': drop,
 440     ':-:': drop,
 441     ':-': drop,
 442     '-:': drop,
 443     '/-': drop,
 444     '-/': drop,
 445 }
 446 
 447 
 448 # extra imports for the `python-lambda` option
 449 
 450 pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
 451 
 452 if any(s in pyf for s in argv):
 453     import decimal
 454     import fractions
 455     import json
 456     import functools
 457     import itertools
 458     import math
 459     Math = math
 460     from math import \
 461         acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \
 462         copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \
 463         fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \
 464         isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \
 465         log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \
 466         radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp
 467     try:
 468         from math import cbrt, exp2
 469     except Exception:
 470         pass
 471     power = pow
 472     import operator
 473     import statistics
 474     import string
 475     import textwrap
 476     import time
 477     import urllib.parse
 478 
 479 
 480 apo = '\''
 481 apos = '\''
 482 backquote = '`'
 483 backtick = '`'
 484 ball = ''
 485 block = ''
 486 btick = '`'
 487 bullet = ''
 488 cdot = '·'
 489 circle = ''
 490 cross = '×'
 491 dquo = '"'
 492 dquote = '"'
 493 emdash = ''
 494 endash = ''
 495 ge = ''
 496 geq = ''
 497 hellip = ''
 498 hole = ''
 499 lcurly = '{'
 500 ldquo = ''
 501 ldquote = ''
 502 le = ''
 503 leq = ''
 504 mdash = ''
 505 mdot = '·'
 506 miniball = ''
 507 ndash = ''
 508 neq = ''
 509 rcurly = '}'
 510 rdquo = ''
 511 rdquote = ''
 512 sball = ''
 513 square = ''
 514 squo = '\''
 515 squote = '\''
 516 
 517 
 518 def dive(into: Any, doing: Callable) -> Any:
 519     'Transform a nested value by calling a func via depth-first recursion.'
 520 
 521     # support args in either order
 522     if callable(into):
 523         into, doing = doing, into
 524 
 525     return _dive_kv(None, into, doing)
 526 
 527 deepmap = dive
 528 dive1 = dive
 529 
 530 
 531 def divebin(x: Any, y: Any, doing: Callable) -> Any:
 532     'Nested 2-value version of depth-first-recursive func dive.'
 533 
 534     # support args in either order
 535     if callable(x):
 536         x, y, doing = y, doing, x
 537 
 538     narg = required_arg_count(doing)
 539     if narg == 2:
 540         return dive(x, lambda a: dive(y, lambda b: doing(a, b)))
 541     if narg == 4:
 542         return dive(x, lambda i, a: dive(y, lambda j, b: doing(i, a, j, b)))
 543     raise Exception('divebin(...) only supports funcs with 2 or 4 args')
 544 
 545 bindive = divebin
 546 # diveboth = divebin
 547 # dualdive = divebin
 548 # duodive = divebin
 549 dive2 = divebin
 550 
 551 
 552 def _dive_kv(key: Any, into: Any, doing: Callable) -> Any:
 553     if isinstance(into, dict):
 554         return {k: _dive_kv(k, v, doing) for k, v in into.items()}
 555     if isinstance(into, Iterable) and not isinstance(into, str):
 556         return [_dive_kv(i, e, doing) for i, e in enumerate(into)]
 557 
 558     narg = required_arg_count(doing)
 559     return doing(key, into) if narg == 2 else doing(into)
 560 
 561 
 562 def recover(attempt: Callable, fallback: Any = None) -> Any:
 563     try:
 564         return attempt()
 565     except Exception as e:
 566         if callable(fallback):
 567             return fallback(e)
 568         return fallback
 569 
 570 attempt = recover
 571 attempted = recover
 572 recovered = recover
 573 recoverred = recover
 574 rescue = recover
 575 rescued = recover
 576 trycall = recover
 577 
 578 
 579 def required_arg_count(f: Callable) -> int:
 580     if isinstance(f, type):
 581         return 1
 582 
 583     meta = getfullargspec(f)
 584     n = len(meta.args)
 585     if meta.defaults:
 586         n -= len(meta.defaults)
 587     return n
 588 
 589 
 590 # deny file-access to expression-evaluators
 591 open = None
 592 
 593 try:
 594     # load data, trying to handle help-like options as well
 595     try:
 596         data = load(stdin.buffer)
 597     except Exception as e:
 598         if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
 599             show_help(None)
 600         else:
 601             raise e
 602 
 603     data = zoom(data, tuple(argv[1:]))
 604     dump(data, stdout, indent=2, allow_nan=False, check_circular=False)
 605     # dump(data, stdout, indent=None, allow_nan=False, check_circular=False)
 606     stdout.write('\n')
 607 except BrokenPipeError:
 608     # quit quietly, instead of showing a confusing error message
 609     stderr.close()
 610 except KeyboardInterrupt:
 611     exit(2)
 612 except Exception as e:
 613     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 614     exit(1)