File: zj.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from inspect import getfullargspec
  27 from itertools import islice
  28 from json import load, dump, dumps
  29 from sys import argv, stderr, stdin, stdout
  30 from typing import Any, Callable, Dict, Iterable, NoReturn, Tuple
  31 
  32 
  33 from re import compile as compile_re
  34 
  35 
  36 info_msg = '''
  37 zj [keys/indices...]
  38 
  39 
  40 Zoom Json digs into a subset of valid JSON input, using the given mix of
  41 keys and array-indices, the latter being either 0-based or negative, to
  42 index backward from the ends of arrays.
  43 
  44 Zooming on object keys is first tried as an exact key-match, failing that
  45 as a case-insensitive key-match (first such match): when both approaches
  46 fail, if the key is a valid integer, the key at the (even negative) index
  47 given is used.
  48 
  49 Invalid array-indices and missing object-keys result in null values, when
  50 none of the special keys/fallbacks shown later apply.
  51 
  52 You can slice arrays the exclusive/go/python way using index-pairs with a
  53 `:` between the start/end pair, as long as it's a single argument; you can
  54 even use `..` as the index-pair separator to include the stop index in the
  55 result. Either way, as with go/python, you can omit either of the indices
  56 when slicing.
  57 
  58 Special key `.` acts as implicit loops on arrays, and even objects without
  59 that specific key: in the unlikely case that an object has `.` as one of
  60 its keys, you can use one of loop-fallback aliases, shown later.
  61 
  62 Another special key is `+` (no quotes): when used, the rest of the keys
  63 are used `in parallel`, allowing multiple picks from the current value.
  64 
  65 Similar to `+`, the `-` fallback-key drops keys, which means all items are
  66 picked, except for those mentioned after the `-`.
  67 
  68 In case any of the special keys are actual keys in the data loaded, some
  69 aliases are available:
  70 
  71     .   /.  ./  :.  .:
  72     +   /+  +/  :+  +:
  73     -   /-  -/  :-  -:
  74 
  75     .i   :i   .info    :info     :info:
  76     .k   :k   .keys    :keys     :keys:
  77     .l   :l   .len     .length   :len      :len:    :length    :length:
  78               .tally   :tally    :tally:
  79     .t   :t   .type    :type     :type:
  80     .u   :u   .unique  :unique   :unique:
  81 
  82 These aliases allow using the special functionality even on objects whose
  83 keys match some of these special names, as it's extremely unlikely data use
  84 all aliases as actual keys at any level.
  85 
  86 The only input supported is valid JSON coming from standard-input: there's
  87 no way to load files using their names. To load data from files/URIs use
  88 tools like `cat` or `curl`, and pipe their output into this tool.
  89 '''
  90 
  91 slice_re = compile_re('''^(([+-]?[0-9]+)?)(:|\.\.)(([+-]?[0-9]+)?)$''')
  92 
  93 
  94 def zoom(data: Any, keys: Tuple[str, ...]) -> Any:
  95     eval_due = False
  96     pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
  97 
  98     for i, k in enumerate(keys):
  99         try:
 100             if eval_due:
 101                 data = eval(k)(data)
 102                 eval_due = False
 103                 continue
 104 
 105             if isinstance(data, dict):
 106                 m = match_key(data, k)
 107                 if m in data:
 108                     data = data[m]
 109                     continue
 110                 m = slice_re.match(k)
 111                 if m:
 112                     data = {k: data[k] for k in match_keys(data, k)}
 113                     continue
 114 
 115             if isinstance(data, (list, tuple)):
 116                 try:
 117                     i = int(k)
 118                     l = len(data)
 119                     data = data[i] if -l <= i < l else None
 120                     continue
 121                 except Exception:
 122                     m = slice_re.match(k)
 123                     if m:
 124                         data = [data[i] for i in match_indices(data, k)]
 125                         continue
 126 
 127             if k in pyf:
 128                 eval_due = True
 129                 continue
 130 
 131             if k in ('.', '/.', './', ':.', '.:'):
 132                 if isinstance(data, dict):
 133                     rest = tuple(keys[i + 1:])
 134                     return {k: zoom(v, rest) for k, v in data.items()}
 135                 if isinstance(data, (list, tuple)):
 136                     rest = tuple(keys[i + 1:])
 137                     return tuple(zoom(v, rest) for v in data)
 138 
 139                 # doing nothing amounts to an identity-op for simple values
 140                 continue
 141 
 142             fn = final_fallbacks.get(k, None)
 143             if fn:
 144                 return fn(data, tuple(keys[i + 1:]))
 145 
 146             fn = fallbacks.get(k, None)
 147             if fn:
 148                 data = fn(data)
 149                 continue
 150 
 151             if isinstance(data, (dict, list, tuple)):
 152                 data = None
 153                 continue
 154 
 155             kind = typeof(data)
 156             msg = f'value of type {kind} has no properties to zoom into'
 157             raise Exception(msg)
 158         except Exception as e:
 159             key_path = ' > '.join(islice(keys, None, i + 1))
 160             raise Exception(f'{key_path}: {e}')
 161 
 162     return data
 163 
 164 
 165 def match_key(src: Dict, key: str) -> str:
 166     if key in src:
 167         return key
 168 
 169     low = key.casefold()
 170     for k in src.keys():
 171         if low == k.casefold():
 172             return k
 173 
 174     try:
 175         i = int(key)
 176         l = len(src)
 177         if i < 0:
 178             i += l
 179         if i < 0 or i >= l:
 180             return None
 181         for j, k in enumerate(src.keys()):
 182             if i == j:
 183                 return k
 184     except Exception:
 185         return key
 186     return key
 187 
 188 
 189 def match_keys(src: Any, key: str) -> Iterable:
 190     if isinstance(src, (list, tuple)):
 191         yield from match_indices(src, key)
 192         yield from match_fallbacks(src, key)
 193         return
 194 
 195     if isinstance(src, dict):
 196         if key in src:
 197             yield key
 198             return
 199 
 200         low = key.casefold()
 201         for k in src.keys():
 202             if low == k.casefold():
 203                 yield k
 204                 return
 205 
 206         yield from match_indices(src, key)
 207         yield from match_fallbacks(src, key)
 208         return
 209 
 210     yield from match_fallbacks(src, key)
 211 
 212 
 213 def match_indices(src: Any, key: str) -> Iterable:
 214     try:
 215         i = int(key)
 216 
 217         if isinstance(src, (list, tuple)):
 218             l = len(src)
 219             yield src[i] if -l <= i < l else None
 220             return
 221 
 222         if isinstance(src, dict):
 223             l = len(src)
 224             if i < 0:
 225                 i += l
 226             if i < 0 or i >= l:
 227                 return
 228 
 229             for j, k in enumerate(src.keys()):
 230                 if i == j:
 231                     yield k
 232                     return
 233 
 234         return
 235     except Exception:
 236         pass
 237 
 238     m = slice_re.match(key)
 239     if not m:
 240         return
 241 
 242     l = len(src)
 243 
 244     (start, _, kind, stop, _) = m.groups()
 245     start = int(start) if start != '' else 0
 246     stop = int(stop) if stop != '' else l
 247 
 248     if start < 0:
 249         start += l
 250     start = max(start, 0)
 251     if stop < 0:
 252         stop += l
 253     stop = min(stop, l)
 254     if kind == '..':
 255         stop += 1
 256     stop = min(stop, l)
 257 
 258     if start > stop:
 259         return
 260     if (start < 0 and stop < 0) or (start >= l and stop >= l):
 261         return
 262 
 263 
 264     if isinstance(src, dict):
 265         for i, k in enumerate(src.keys()):
 266             if i >= stop:
 267                 return
 268             if start <= i:
 269                 yield k
 270         return
 271 
 272     if isinstance(src, (list, tuple)):
 273         yield from range(start, stop)
 274         return
 275 
 276 
 277 
 278 def match_fallbacks(src: Any, key: str) -> Iterable:
 279     fn = fallbacks.get(key, None)
 280     if fn:
 281         yield fn(src)
 282 
 283 
 284 def show_help(*_) -> NoReturn:
 285     print(info_msg.strip(), file=stderr)
 286     exit(1)
 287 
 288 
 289 def keys(src: Any) -> Any:
 290     if isinstance(src, dict):
 291         return tuple(src.keys())
 292     if isinstance(src, (list, tuple)):
 293         return tuple(range(len(src)))
 294     return None
 295 
 296 
 297 def info(x: Any) -> str:
 298     if isinstance(x, dict):
 299         return f'object ({len(x)} items)'
 300     if isinstance(x, (list, tuple)):
 301         return f'array ({len(x)} items)'
 302     return typeof(x)
 303 
 304 
 305 def tally(x: Any) -> Any:
 306     if not isinstance(x, (list, tuple)):
 307         return None
 308     tally = {}
 309     for v in x:
 310         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 311         if s in tally:
 312             tally[s] += 1
 313         else:
 314             tally[s] = 1
 315     keys = sorted(tally.keys(), key=lambda k: tally[k], reverse=True)
 316     return {k: tally[k] for k in keys}
 317 
 318 
 319 def typeof(x: Any) -> str:
 320     return {
 321         type(None): 'null',
 322         bool: 'boolean',
 323         dict: 'object',
 324         float: 'number',
 325         int: 'number',
 326         str: 'string',
 327         list: 'array',
 328         tuple: 'array',
 329     }.get(type(x), 'other')
 330 
 331 
 332 def unique(x: Any) -> Any:
 333     if not isinstance(x, (list, tuple)):
 334         return x
 335     got = set()
 336     unique = []
 337     for v in x:
 338         s = dumps(v, indent=None, allow_nan=False, check_circular=False)
 339         if s in got:
 340             continue
 341         unique.append(v)
 342         got.add(s)
 343     return unique
 344 
 345 
 346 fallbacks: Dict[str, Callable] = {
 347     '.h': show_help,
 348     '.help': show_help,
 349     ':h': show_help,
 350     ':help': show_help,
 351     ':help:': show_help,
 352     '.i': info,
 353     '.info': info,
 354     ':i': info,
 355     ':info': info,
 356     ':info:': info,
 357     '.k': keys,
 358     '.keys': keys,
 359     ':keys': keys,
 360     ':keys:': keys,
 361     '.kind': typeof,
 362     ':kind': typeof,
 363     ':kind:': typeof,
 364     '.l': len,
 365     '.len': len,
 366     '.length': len,
 367     ':l': len,
 368     ':len': len,
 369     ':length': len,
 370     ':len:': len,
 371     ':length:': len,
 372     '.tally': tally,
 373     ':tally': tally,
 374     ':tally:': tally,
 375     '.t': typeof,
 376     '.type': typeof,
 377     ':t': typeof,
 378     ':type': typeof,
 379     ':type:': typeof,
 380     '.u': unique,
 381     '.unique': unique,
 382     ':u': unique,
 383     ':unique': unique,
 384     ':u:': unique,
 385     ':unique:': unique,
 386 }
 387 
 388 
 389 def pick(src: Any, keys: Tuple[str, ...]) -> Any:
 390     if isinstance(src, dict):
 391         picked = {}
 392         for k in keys:
 393             for k in match_keys(src, k):
 394                 picked[k] = src[k]
 395         return picked
 396 
 397     # if isinstance(src, (list, tuple)):
 398     #     picked = []
 399     #     for k in keys:
 400     #         for i in match_indices(src, k):
 401     #             picked.append(src[i])
 402     #     return tuple(picked)
 403 
 404     if isinstance(src, (list, tuple)):
 405         return tuple(pick(e, keys) for e in src if isinstance(e, dict))
 406 
 407     msg = f'can\'t pick properties from value of type {typeof(src)}'
 408     raise Exception(msg)
 409 
 410 
 411 def drop(src: Any, keys: Tuple[str, ...]) -> Any:
 412     if isinstance(src, dict):
 413         avoid = set()
 414         for k in keys:
 415             for k in match_keys(src, k):
 416                 avoid.add(k)
 417         return {k: v for k, v in src.items() if not k in avoid}
 418 
 419     # if isinstance(src, (list, tuple)):
 420     #     l = len(src)
 421     #     avoid = set()
 422     #     for k in keys:
 423     #         for i in match_indices(src, k):
 424     #             avoid.add(i if i >= 0 else i + l)
 425     #     return tuple(v for i, v in enumerate(src) if not i in avoid)
 426 
 427     if isinstance(src, (list, tuple)):
 428         return tuple(drop(e, keys) for e in src if isinstance(e, dict))
 429 
 430     msg = f'can\'t drop properties from value of type {typeof(src)}'
 431     raise Exception(msg)
 432 
 433 
 434 final_fallbacks: Dict[str, Callable] = {
 435     '+': pick,
 436     ':+:': pick,
 437     ':+': pick,
 438     '+:': pick,
 439     '/+': pick,
 440     '+/': pick,
 441 
 442     '-': drop,
 443     ':-:': drop,
 444     ':-': drop,
 445     '-:': drop,
 446     '/-': drop,
 447     '-/': drop,
 448 }
 449 
 450 
 451 # extra imports for the `python-lambda` option
 452 
 453 pyf = ('.pyl', ':pyl', 'pyl:', ':pyl:', '.pyf', ':pyf', 'pyf:', ':pyf:')
 454 
 455 if any(s in pyf for s in argv):
 456     import decimal
 457     import fractions
 458     import json
 459     import functools
 460     import itertools
 461     import math
 462     Math = math
 463     from math import \
 464         acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \
 465         copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \
 466         fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \
 467         isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \
 468         log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \
 469         radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp
 470     try:
 471         from math import cbrt, exp2
 472     except Exception:
 473         pass
 474     power = pow
 475     import operator
 476     import statistics
 477     import string
 478     import textwrap
 479     import time
 480     import urllib.parse
 481 
 482 
 483 apo = '\''
 484 apos = '\''
 485 backquote = '`'
 486 backtick = '`'
 487 ball = ''
 488 block = ''
 489 btick = '`'
 490 bullet = ''
 491 cdot = '·'
 492 circle = ''
 493 cross = '×'
 494 dquo = '"'
 495 dquote = '"'
 496 emdash = ''
 497 endash = ''
 498 ge = ''
 499 geq = ''
 500 hellip = ''
 501 hole = ''
 502 lcurly = '{'
 503 ldquo = ''
 504 ldquote = ''
 505 le = ''
 506 leq = ''
 507 mdash = ''
 508 mdot = '·'
 509 miniball = ''
 510 ndash = ''
 511 neq = ''
 512 rcurly = '}'
 513 rdquo = ''
 514 rdquote = ''
 515 sball = ''
 516 square = ''
 517 squo = '\''
 518 squote = '\''
 519 
 520 
 521 def dive(into: Any, doing: Callable) -> Any:
 522     'Transform a nested value by calling a func via depth-first recursion.'
 523 
 524     # support args in either order
 525     if callable(into):
 526         into, doing = doing, into
 527 
 528     return _dive_kv(None, into, doing)
 529 
 530 deepmap = dive
 531 dive1 = dive
 532 
 533 
 534 def divebin(x: Any, y: Any, doing: Callable) -> Any:
 535     'Nested 2-value version of depth-first-recursive func dive.'
 536 
 537     # support args in either order
 538     if callable(x):
 539         x, y, doing = y, doing, x
 540 
 541     narg = required_arg_count(doing)
 542     if narg == 2:
 543         return dive(x, lambda a: dive(y, lambda b: doing(a, b)))
 544     if narg == 4:
 545         return dive(x, lambda i, a: dive(y, lambda j, b: doing(i, a, j, b)))
 546     raise Exception('divebin(...) only supports funcs with 2 or 4 args')
 547 
 548 bindive = divebin
 549 dive2 = divebin
 550 
 551 
 552 def _dive_kv(key: Any, into: Any, doing: Callable) -> Any:
 553     if isinstance(into, dict):
 554         return {k: _dive_kv(k, v, doing) for k, v in into.items()}
 555     if isinstance(into, Iterable) and not isinstance(into, str):
 556         return [_dive_kv(i, e, doing) for i, e in enumerate(into)]
 557 
 558     narg = required_arg_count(doing)
 559     return doing(key, into) if narg == 2 else doing(into)
 560 
 561 
 562 def rescue(attempt: Callable, fallback: Any = None) -> Any:
 563     try:
 564         return attempt()
 565     except Exception as e:
 566         if callable(fallback):
 567             return fallback(e)
 568         return fallback
 569 
 570 catch = rescue
 571 recover = rescue
 572 rescued = rescue
 573 
 574 
 575 def required_arg_count(f: Callable) -> int:
 576     if isinstance(f, type):
 577         return 1
 578 
 579     meta = getfullargspec(f)
 580     n = len(meta.args)
 581     if meta.defaults:
 582         n -= len(meta.defaults)
 583     return n
 584 
 585 
 586 # deny file-access to expression-evaluators
 587 open = None
 588 
 589 try:
 590     # load data, trying to handle help-like options as well
 591     try:
 592         data = load(stdin.buffer)
 593     except Exception as e:
 594         if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
 595             show_help(None)
 596         else:
 597             raise e
 598 
 599     data = zoom(data, tuple(argv[1:]))
 600     dump(data, stdout, indent=2, separators=(',', ': '),
 601         allow_nan=False, check_circular=False)
 602     stdout.write('\n')
 603 except BrokenPipeError:
 604     # quit quietly, instead of showing a confusing error message
 605     stderr.close()
 606     exit(0)
 607 except KeyboardInterrupt:
 608     exit(2)
 609 except Exception as e:
 610     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 611     exit(1)