File: tb.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tb [options...] [python expression] [filepath/URI...]
  28 
  29 
  30 Transform Bytes loads binary data, runs a Python expression on it, and emits
  31 the result as binary data. Input-data are available to the expression as any
  32 of the variables named `v`, `value`, `d`, and `data`.
  33 
  34 If no file/URI is given, it loads data from its standard input. If the arg
  35 before the expression is a single equals sign (a `=`, without the quotes),
  36 no data are read, and the expression is evaluated as given.
  37 
  38 Options, where leading double-dashes are also allowed, except for alias `=`:
  39 
  40     -h          show this help message
  41     -help       same as -h
  42 
  43     -nil         don't read any input
  44     -no-input    same as -nil
  45     -noinput     same as -nil
  46     -none        same as -nil
  47     -null        same as -nil
  48     -null-input  same as -nil
  49     -nullinput   same as -nil
  50     =            same as -nil
  51 
  52     -t          show a full traceback of this script for exceptions
  53     -trace      same as -t
  54     -traceback  same as -t
  55 
  56 
  57 Extra Functions
  58 
  59 after(x, y)       ignore bytes until the one given
  60 afterfinal(x, y)  backward counterpart of func after
  61 afterlast(x, y)   same as func afterfinal
  62 arrayish(x)       check if value is a list, a tuple, or a generator
  63 before(x, y)      ignore bytes since the one given
  64 beforefinal(x, y) backward counterpart of func before
  65 beforelast(x, y)  same as func beforefinal
  66 ints(x, y, ?f)    make sequences of increasing integers, which include the end
  67 iota(x, ?f)       make an integer sequence from 1 up to the number given
  68 join(x, y)        join values into a string; make a dict from keys and values
  69 recover(*args)    recover from exceptions with a fallback value
  70 since(x, y)       ignore bytes before the one given
  71 sincefinal(x, y)  backward counterpart of func since
  72 sincelast(x, y)   same as func sincefinal
  73 tobytes(x)        turn value into a `bytes` value
  74 until(x, y)       ignore bytes after the one given
  75 untilfinal(x, y)  backward counterpart of func until
  76 untillast(x, y)   same as func untilfinal
  77 
  78 
  79 Examples
  80 
  81 # numbers from 0 to 5; no input is read/used
  82 tb = 'range(6)'
  83 
  84 # emit all ASCII bytes
  85 tb = 'range(128)'
  86 
  87 # more examples needed...
  88 '''
  89 
  90 
  91 from sys import argv, exit, stderr, stdin, stdout
  92 
  93 
  94 if __name__ != '__main__':
  95     print('don\'t import this script, run it directly instead', file=stderr)
  96     exit(1)
  97 
  98 # no args or a leading help-option arg means show the help message and quit
  99 help_opts = ('-h', '--h', '-help', '--help')
 100 if len(argv) < 2 or (len(argv) == 2 and argv[1] in help_opts):
 101     print(info.strip(), file=stderr)
 102     exit(0)
 103 
 104 
 105 from base64 import \
 106     standard_b64encode, standard_b64decode, \
 107     standard_b64encode as base64bytes, standard_b64decode as debase64bytes
 108 
 109 from collections import \
 110     ChainMap, Counter, defaultdict, deque, namedtuple, OrderedDict, \
 111     UserDict, UserList, UserString
 112 
 113 from copy import copy, deepcopy
 114 
 115 from datetime import \
 116     MAXYEAR, MINYEAR, date, datetime, time, timedelta, timezone, tzinfo
 117 try:
 118     from datetime import now, UTC
 119 except Exception:
 120     now = lambda: datetime(2000, 1, 1).now()
 121 
 122 from decimal import Decimal, getcontext
 123 
 124 from difflib import \
 125     context_diff, diff_bytes, Differ, get_close_matches, HtmlDiff, \
 126     IS_CHARACTER_JUNK, IS_LINE_JUNK, ndiff, restore, SequenceMatcher, \
 127     unified_diff
 128 
 129 from fractions import Fraction
 130 
 131 import functools
 132 from functools import \
 133     cache, cached_property, cmp_to_key, get_cache_token, lru_cache, \
 134     namedtuple, partial, partialmethod, recursive_repr, reduce, \
 135     singledispatch, singledispatchmethod, total_ordering, update_wrapper, \
 136     wraps
 137 
 138 from glob import glob, iglob
 139 
 140 try:
 141     from graphlib import CycleError, TopologicalSorter
 142 except Exception:
 143     pass
 144 
 145 from hashlib import \
 146     file_digest, md5, pbkdf2_hmac, scrypt, sha1, sha224, sha256, sha384, \
 147     sha512
 148 
 149 from inspect import getfullargspec, getsource
 150 
 151 import itertools
 152 from itertools import \
 153     accumulate, chain, combinations, combinations_with_replacement, \
 154     compress, count, cycle, dropwhile, filterfalse, groupby, islice, \
 155     permutations, product, repeat, starmap, takewhile, tee, zip_longest
 156 try:
 157     from itertools import pairwise
 158     from itertools import batched
 159 except Exception:
 160     pass
 161 
 162 from json import dump, dumps, loads
 163 
 164 import math
 165 Math = math
 166 from math import \
 167     acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \
 168     copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \
 169     fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \
 170     isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \
 171     log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \
 172     radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp
 173 try:
 174     from math import cbrt, exp2
 175 except Exception:
 176     pass
 177 
 178 power = pow
 179 
 180 import operator
 181 
 182 from pathlib import Path
 183 
 184 from pprint import \
 185     isreadable, isrecursive, pformat, pp, pprint, PrettyPrinter, saferepr
 186 
 187 from random import \
 188     betavariate, choice, choices, expovariate, gammavariate, gauss, \
 189     getrandbits, getstate, lognormvariate, normalvariate, paretovariate, \
 190     randbytes, randint, random, randrange, sample, seed, setstate, \
 191     shuffle, triangular, uniform, vonmisesvariate, weibullvariate
 192 
 193 compile_py = compile # keep built-in func compile for later
 194 from re import compile as compile_uncached, Pattern, IGNORECASE
 195 
 196 import statistics
 197 from statistics import \
 198     bisect_left, bisect_right, fmean, \
 199     geometric_mean, harmonic_mean, mean, median, \
 200     median_grouped, median_high, median_low, mode, multimode, pstdev, \
 201     pvariance, quantiles, stdev, variance
 202 try:
 203     from statistics import \
 204         correlation, covariance, linear_regression, mul
 205 except Exception:
 206     pass
 207 
 208 import string
 209 from string import \
 210     Formatter, Template, ascii_letters, ascii_lowercase, ascii_uppercase, \
 211     capwords, digits, hexdigits, octdigits, printable, punctuation, \
 212     whitespace
 213 
 214 alphabet = ascii_letters
 215 letters = ascii_letters
 216 lowercase = ascii_lowercase
 217 uppercase = ascii_uppercase
 218 
 219 from textwrap import dedent, fill, indent, shorten, wrap
 220 
 221 from time import \
 222     altzone, asctime, \
 223     ctime, daylight, get_clock_info, \
 224     gmtime, localtime, mktime, monotonic, monotonic_ns, perf_counter, \
 225     perf_counter_ns, process_time, process_time_ns, \
 226     sleep, strftime, strptime, struct_time, thread_time, thread_time_ns, \
 227     time, time_ns, timezone, tzname
 228 try:
 229     from time import \
 230         clock_getres, clock_gettime, clock_gettime_ns, clock_settime, \
 231         clock_settime_ns, pthread_getcpuclockid, tzset
 232 except Exception:
 233     pass
 234 
 235 from unicodedata import \
 236     bidirectional, category, combining, decimal, decomposition, digit, \
 237     east_asian_width, is_normalized, lookup, mirrored, name, normalize, \
 238     numeric
 239 
 240 from urllib.parse import \
 241     parse_qs, parse_qsl, quote, quote_from_bytes, quote_plus, unquote, \
 242     unquote_plus, unquote_to_bytes, unwrap, urldefrag, urlencode, urljoin, \
 243     urlparse, urlsplit, urlunparse, urlunsplit
 244 
 245 
 246 from typing import \
 247     AbstractSet, Annotated, Any, AnyStr, \
 248     AsyncContextManager, AsyncGenerator, AsyncIterable, AsyncIterator, \
 249     Awaitable, BinaryIO, ByteString, Callable, cast, \
 250     ClassVar, Collection, Container, \
 251     ContextManager, Coroutine, Deque, Dict, Final, \
 252     final, ForwardRef, FrozenSet, Generator, Generic, get_args, get_origin, \
 253     get_type_hints, Hashable, IO, ItemsView, \
 254     Iterable, Iterator, KeysView, List, Literal, Mapping, \
 255     MappingView, Match, MutableMapping, MutableSequence, MutableSet, \
 256     NamedTuple, NewType, no_type_check, no_type_check_decorator, \
 257     NoReturn, Optional, overload, \
 258     Protocol, Reversible, \
 259     runtime_checkable, Sequence, Set, Sized, SupportsAbs, \
 260     SupportsBytes, SupportsComplex, SupportsFloat, SupportsIndex, \
 261     SupportsInt, SupportsRound, Text, TextIO, Tuple, Type, \
 262     TypedDict, TypeVar, \
 263     TYPE_CHECKING, Union, ValuesView
 264 try:
 265     from typing import \
 266         assert_never, assert_type, clear_overloads, Concatenate, \
 267         dataclass_transform, get_overloads, is_typeddict, LiteralString, \
 268         Never, NotRequired, ParamSpec, ParamSpecArgs, ParamSpecKwargs, \
 269         Required, reveal_type, Self, TypeAlias, TypeGuard, TypeVarTuple, \
 270         Unpack
 271     from typing import \
 272         AwaitableGenerator, override, TypeAliasType, type_check_only
 273 except Exception:
 274     pass
 275 
 276 
 277 false = False
 278 true = True
 279 nil = None
 280 none = None
 281 null = None
 282 
 283 cr = b'\r'
 284 crlf = b'\r\n'
 285 dquo = b'"'
 286 dquote = b'"'
 287 lcurly = b'{'
 288 lf = b'\n'
 289 rcurly = b'}'
 290 squo = b'\''
 291 squote = b'\''
 292 tab = b'\t'
 293 utf8bom = b'\xef\xbb\xbf'
 294 utf16be = b'\xfe\xff'
 295 utf16le = b'\xff\xfe'
 296 
 297 # re_cache is used by custom func compile to cache previously-compiled
 298 # regular-expressions, which makes them quicker to (re)use in formulas
 299 re_cache: Dict[str, Pattern] = {}
 300 
 301 # ire_cache is like re_cache, except it's for case-insensitive regexes
 302 ire_cache: Dict[str, Pattern] = {}
 303 
 304 
 305 def arrayish(x: Any) -> bool:
 306     'Check if a value is array-like enough.'
 307     return isinstance(x, (list, tuple, range, Generator))
 308 
 309 def compile(s: str, case_sensitive: bool = True) -> Pattern:
 310     'Cached regex `compiler`, so it\'s quicker to (re)use in formulas.'
 311 
 312     cache = re_cache if case_sensitive else ire_cache
 313     options = 0 if case_sensitive else IGNORECASE
 314 
 315     if s in cache:
 316         return cache[s]
 317     e = compile_uncached(s, options)
 318     cache[s] = e
 319     return e
 320 
 321 def disabled_exec(*args, **kwargs) -> None:
 322     _ = args
 323     _ = kwargs
 324     raise Exception('built-in func `exec` is disabled')
 325 
 326 def identity(x: Any) -> Any:
 327     '''
 328     Return the value given: this is the default transformer for several
 329     higher-order funcs, which effectively keeps original items as given.
 330     '''
 331     return x
 332 
 333 idem = identity
 334 iden = identity
 335 
 336 def ints(start, stop, f: Callable = identity) -> Iterable[int]:
 337     'Sequence integers, end-value included.'
 338 
 339     if isnan(start) or isnan(stop) or isinf(start) or isinf(stop):
 340         return tuple()
 341     return (f(e) for e in range(int(ceil(start)), int(stop) + 1))
 342 
 343 integers = ints
 344 
 345 def iota(n: int, f: Callable = identity) -> Iterable[int]:
 346     'Sequence all integers from 1 up to (and including) the int given.'
 347     return (f(e) for e in range(1, n + 1))
 348 
 349 def join(items: Iterable, sep: Union[str, Iterable] = ' ') -> Union[str, Dict]:
 350     '''
 351     Join iterables using the separator-string given: its 2 arguments
 352     can come in either order, and are sorted out if needed. When given
 353     2 non-string iterables, the result is an object whose keys are from
 354     the first argument, and whose values are from the second one.
 355 
 356     You can use it any of the following ways, where `keys` and `values` are
 357     sequences (lists, tuples, or generators), and `separator` is a string:
 358 
 359         join(values)
 360         join(values, separator)
 361         join(separator, values)
 362         join(keys, values)
 363     '''
 364 
 365     if arrayish(items) and arrayish(sep):
 366         return {k: v for k, v in zip(items, sep)}
 367     if isinstance(items, str):
 368         items, sep = sep, items
 369     return sep.join(str(e) for e in items)
 370 
 371 def json0(x: Any) -> str:
 372     'Encode value into a minimal single-line JSON string.'
 373     return dumps(x, separators=(',', ':'), allow_nan=False, indent=None)
 374 
 375 j0 = json0
 376 
 377 def json2(x: Any) -> str:
 378     '''
 379     Encode value into a (possibly multiline) JSON string, using 2 spaces for
 380     each indentation level.
 381     '''
 382     return dumps(x, separators=(',', ': '), allow_nan=False, indent=2)
 383 
 384 j2 = json2
 385 
 386 def tally(src: Iterable, by: Callable = identity) -> Dict[Any, int]:
 387     '''
 388     Count all distinct (transformed) values, the result being a dictionary
 389     whose keys are all the transformed values, and whose items are positive
 390     integers.
 391     '''
 392 
 393     if callable(src):
 394         src, by = by, src
 395 
 396     tally: Dict[Any, int] = {}
 397 
 398     if isinstance(src, dict):
 399         for k, v in src.items():
 400             dk = by(k, v)
 401             if dk in tally:
 402                 tally[dk] += 1
 403             else:
 404                 tally[dk] = 1
 405     else:
 406         for v in src:
 407             dk = by(v)
 408             if dk in tally:
 409                 tally[dk] += 1
 410             else:
 411                 tally[dk] = 1
 412     return tally
 413 
 414 tallied = tally
 415 
 416 def after(x: bytes, what: bytes) -> bytes:
 417     what = tointorbytes(what)
 418     i = x.find(what)
 419     return '' if i < 0 else x[i+len(what):]
 420 
 421 def afterlast(x: bytes, what: bytes) -> bytes:
 422     what = tointorbytes(what)
 423     i = x.rfind(what)
 424     return '' if i < 0 else x[i+len(what):]
 425 
 426 afterfinal = afterlast
 427 
 428 def before(x: bytes, what: bytes) -> bytes:
 429     what = tointorbytes(what)
 430     i = x.find(what)
 431     return x if i < 0 else x[:i]
 432 
 433 def beforelast(x: bytes, what: bytes) -> bytes:
 434     what = tointorbytes(what)
 435     i = x.rfind(what)
 436     return x if i < 0 else x[:i]
 437 
 438 beforefinal = beforelast
 439 
 440 def since(x: bytes, what: bytes) -> bytes:
 441     what = tointorbytes(what)
 442     i = x.find(what)
 443     return '' if i < 0 else x[i:]
 444 
 445 def sincelast(x: bytes, what: bytes) -> bytes:
 446     what = tointorbytes(what)
 447     i = x.rfind(what)
 448     return '' if i < 0 else x[i:]
 449 
 450 sincefinal = sincelast
 451 
 452 def until(x: bytes, what: bytes) -> bytes:
 453     what = tointorbytes(what)
 454     i = x.find(what)
 455     return x if i < 0 else x[:i+len(what)]
 456 
 457 def untillast(x: bytes, what: bytes) -> bytes:
 458     what = tointorbytes(what)
 459     i = x.rfind(what)
 460     return x if i < 0 else x[:i+len(what)]
 461 
 462 untilfinal = untillast
 463 
 464 def utf8(x: bytes) -> str:
 465     return str(x, encoding='utf-8')
 466 
 467 toutf8 = utf8
 468 utf8ify = utf8
 469 
 470 
 471 no_input_opts = (
 472     '=', '-None', '--None', '-nil', '--nil', '-noinput', '--noinput',
 473     '-no-input', '--no-input', '-none', '--none', '-null', '--null',
 474     '-null-input', '--null-input', '-nullinput', '--nullinput', '--n',
 475 )
 476 traceback_opts = (
 477     '-t', '--t', '-trace', '--trace', '-traceback', '--traceback',
 478 )
 479 
 480 args = argv[1:]
 481 load_input = True
 482 trace_exceptions = False
 483 expression = None
 484 
 485 # handle all other leading options; the explicit help options are
 486 # handled earlier in the script
 487 while len(args) > 0:
 488     if args[0] in no_input_opts:
 489         load_input = False
 490         args = args[1:]
 491     elif args[0] in traceback_opts:
 492         trace_exceptions = True
 493         args = args[1:]
 494     else:
 495         break
 496 
 497 if len(args) > 0:
 498     expression = args[0]
 499     args = args[1:]
 500 
 501 if expression is None:
 502     print(info.strip(), file=stderr)
 503     exit(0)
 504 
 505 
 506 def make_open_read(open: Callable) -> Callable:
 507     'Restrict the file-open func to a read-only-binary file-open func.'
 508     def open_readonly(name: str):
 509         return open(name, mode='rb')
 510     return open_readonly
 511 
 512 def seems_url(s: str) -> bool:
 513     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 514     return any(s.startswith(p) for p in protocols)
 515 
 516 
 517 def tobytes(x: Any) -> bytes:
 518     if isinstance(x, bytes):
 519         return x
 520     if isinstance(x, (bool, int)):
 521         return bytes(int(x))
 522     if isinstance(x, float):
 523         return bytes(str(x), encoding='utf-8')
 524     if isinstance(x, str):
 525         return bytes(x, encoding='utf-8')
 526     return bytes(x)
 527 
 528 def tointorbytes(x: Any) -> Union[bytes, int]:
 529     return x if isinstance(x, int) else tobytes(x)
 530 
 531 
 532 open_read = make_open_read(open)
 533 open = open_read
 534 
 535 exec = disabled_exec
 536 
 537 
 538 def adapt_result(x: Any, default: Any) -> Any:
 539     if x is True:
 540         return default
 541     if x is False:
 542         return None
 543 
 544     if callable(x):
 545         return x(default)
 546     return x
 547 
 548 
 549 def emit_result(w, x: Any) -> None:
 550     if x is None:
 551         return
 552 
 553     if isinstance(x, (list, tuple, range, Generator)):
 554         for e in x:
 555             w.write(tobytes(e))
 556         return
 557 
 558     w.write(tobytes(x))
 559 
 560 
 561 def eval_expr(expr, using: Any) -> Any:
 562     global v, val, value, d, dat, data
 563     # offer several aliases for the variable with the input bytes
 564     v = val = value = d = dat = data = using
 565     return adapt_result(eval(expr), using)
 566 
 567 
 568 try:
 569     expression = compile_py(expression, expression, 'eval')
 570 
 571     if args.count('-') > 1:
 572         raise Exception('can\'t use stdin (-) more than once')
 573 
 574     if load_input:
 575         for name in args:
 576             if name == '-':
 577                 data = stdin.buffer.read()
 578             elif seems_url(name):
 579                 from urllib.request import urlopen
 580                 with urlopen(name) as inp:
 581                     data = inp.read()
 582             else:
 583                 with open_read(name) as inp:
 584                     data = inp.read()
 585 
 586             emit_result(stdout.buffer, eval_expr(expression, data))
 587 
 588         if len(args) == 0:
 589             data = stdin.buffer.read()
 590             emit_result(stdout.buffer, eval_expr(expression, data))
 591     else:
 592         emit_result(stdout.buffer, eval(expression))
 593 except BrokenPipeError:
 594     # quit quietly, instead of showing a confusing error message
 595     stderr.close()
 596 except KeyboardInterrupt:
 597     exit(2)
 598 except Exception as e:
 599     if trace_exceptions:
 600         raise e
 601     s = str(e)
 602     s = s if s else '<generic exception>'
 603     print(f'\x1b[31m{s}\x1b[0m', file=stderr)
 604     exit(1)