#!/usr/bin/python3 # The MIT License (MIT) # # Copyright © 2024 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. info = ''' tb [options...] [python expression] [filepath/URI...] Transform Bytes loads binary data, runs a Python expression on it, and emits the result as binary data. Input-data are available to the expression as any of the variables named `v`, `value`, `d`, and `data`. If no file/URI is given, it loads data from its standard input. If the arg before the expression is a single equals sign (a `=`, without the quotes), no data are read, and the expression is evaluated as given. Options, where leading double-dashes are also allowed, except for alias `=`: -h show this help message -help same as -h -nil don't read any input -no-input same as -nil -noinput same as -nil -none same as -nil -null same as -nil -null-input same as -nil -nullinput same as -nil = same as -nil -t show a full traceback of this script for exceptions -trace same as -t -traceback same as -t Extra Functions after(x, y) ignore bytes until the one given afterfinal(x, y) backward counterpart of func after afterlast(x, y) same as func afterfinal arrayish(x) check if value is a list, a tuple, or a generator before(x, y) ignore bytes since the one given beforefinal(x, y) backward counterpart of func before beforelast(x, y) same as func beforefinal ints(x, y, ?f) make sequences of increasing integers, which include the end iota(x, ?f) make an integer sequence from 1 up to the number given join(x, y) join values into a string; make a dict from keys and values recover(*args) recover from exceptions with a fallback value since(x, y) ignore bytes before the one given sincefinal(x, y) backward counterpart of func since sincelast(x, y) same as func sincefinal tobytes(x) turn value into a `bytes` value until(x, y) ignore bytes after the one given untilfinal(x, y) backward counterpart of func until untillast(x, y) same as func untilfinal Examples # numbers from 0 to 5; no input is read/used tb = 'range(6)' # emit all ASCII bytes tb = 'range(128)' # more examples needed... ''' from sys import argv, exit, stderr, stdin, stdout if __name__ != '__main__': print('don\'t import this script, run it directly instead', file=stderr) exit(1) # no args or a leading help-option arg means show the help message and quit help_opts = ('-h', '--h', '-help', '--help') if len(argv) < 2 or (len(argv) == 2 and argv[1] in help_opts): print(info.strip(), file=stderr) exit(0) from base64 import \ standard_b64encode, standard_b64decode, \ standard_b64encode as base64bytes, standard_b64decode as debase64bytes from collections import \ ChainMap, Counter, defaultdict, deque, namedtuple, OrderedDict, \ UserDict, UserList, UserString from copy import copy, deepcopy from datetime import \ MAXYEAR, MINYEAR, date, datetime, time, timedelta, timezone, tzinfo try: from datetime import now, UTC except Exception: now = lambda: datetime(2000, 1, 1).now() from decimal import Decimal, getcontext from difflib import \ context_diff, diff_bytes, Differ, get_close_matches, HtmlDiff, \ IS_CHARACTER_JUNK, IS_LINE_JUNK, ndiff, restore, SequenceMatcher, \ unified_diff from fractions import Fraction import functools from functools import \ cache, cached_property, cmp_to_key, get_cache_token, lru_cache, \ namedtuple, partial, partialmethod, recursive_repr, reduce, \ singledispatch, singledispatchmethod, total_ordering, update_wrapper, \ wraps from glob import glob, iglob try: from graphlib import CycleError, TopologicalSorter except Exception: pass from hashlib import \ file_digest, md5, pbkdf2_hmac, scrypt, sha1, sha224, sha256, sha384, \ sha512 from inspect import getfullargspec, getsource import itertools from itertools import \ accumulate, chain, combinations, combinations_with_replacement, \ compress, count, cycle, dropwhile, filterfalse, groupby, islice, \ permutations, product, repeat, starmap, takewhile, tee, zip_longest try: from itertools import pairwise from itertools import batched except Exception: pass from json import dump, dumps, loads import math Math = math from math import \ acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \ copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \ fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \ isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \ log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \ radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp try: from math import cbrt, exp2 except Exception: pass power = pow import operator from pathlib import Path from pprint import \ isreadable, isrecursive, pformat, pp, pprint, PrettyPrinter, saferepr from random import \ betavariate, choice, choices, expovariate, gammavariate, gauss, \ getrandbits, getstate, lognormvariate, normalvariate, paretovariate, \ randbytes, randint, random, randrange, sample, seed, setstate, \ shuffle, triangular, uniform, vonmisesvariate, weibullvariate compile_py = compile # keep built-in func compile for later from re import compile as compile_uncached, Pattern, IGNORECASE import statistics from statistics import \ bisect_left, bisect_right, fmean, \ geometric_mean, harmonic_mean, mean, median, \ median_grouped, median_high, median_low, mode, multimode, pstdev, \ pvariance, quantiles, stdev, variance try: from statistics import \ correlation, covariance, linear_regression, mul except Exception: pass import string from string import \ Formatter, Template, ascii_letters, ascii_lowercase, ascii_uppercase, \ capwords, digits, hexdigits, octdigits, printable, punctuation, \ whitespace alphabet = ascii_letters letters = ascii_letters lowercase = ascii_lowercase uppercase = ascii_uppercase from textwrap import dedent, fill, indent, shorten, wrap from time import \ altzone, asctime, \ ctime, daylight, get_clock_info, \ gmtime, localtime, mktime, monotonic, monotonic_ns, perf_counter, \ perf_counter_ns, process_time, process_time_ns, \ sleep, strftime, strptime, struct_time, thread_time, thread_time_ns, \ time, time_ns, timezone, tzname try: from time import \ clock_getres, clock_gettime, clock_gettime_ns, clock_settime, \ clock_settime_ns, pthread_getcpuclockid, tzset except Exception: pass from unicodedata import \ bidirectional, category, combining, decimal, decomposition, digit, \ east_asian_width, is_normalized, lookup, mirrored, name, normalize, \ numeric from urllib.parse import \ parse_qs, parse_qsl, quote, quote_from_bytes, quote_plus, unquote, \ unquote_plus, unquote_to_bytes, unwrap, urldefrag, urlencode, urljoin, \ urlparse, urlsplit, urlunparse, urlunsplit from typing import \ AbstractSet, Annotated, Any, AnyStr, \ AsyncContextManager, AsyncGenerator, AsyncIterable, AsyncIterator, \ Awaitable, BinaryIO, ByteString, Callable, cast, \ ClassVar, Collection, Container, \ ContextManager, Coroutine, Deque, Dict, Final, \ final, ForwardRef, FrozenSet, Generator, Generic, get_args, get_origin, \ get_type_hints, Hashable, IO, ItemsView, \ Iterable, Iterator, KeysView, List, Literal, Mapping, \ MappingView, Match, MutableMapping, MutableSequence, MutableSet, \ NamedTuple, NewType, no_type_check, no_type_check_decorator, \ NoReturn, Optional, overload, \ Protocol, Reversible, \ runtime_checkable, Sequence, Set, Sized, SupportsAbs, \ SupportsBytes, SupportsComplex, SupportsFloat, SupportsIndex, \ SupportsInt, SupportsRound, Text, TextIO, Tuple, Type, \ TypedDict, TypeVar, \ TYPE_CHECKING, Union, ValuesView try: from typing import \ assert_never, assert_type, clear_overloads, Concatenate, \ dataclass_transform, get_overloads, is_typeddict, LiteralString, \ Never, NotRequired, ParamSpec, ParamSpecArgs, ParamSpecKwargs, \ Required, reveal_type, Self, TypeAlias, TypeGuard, TypeVarTuple, \ Unpack from typing import \ AwaitableGenerator, override, TypeAliasType, type_check_only except Exception: pass false = False true = True nil = None none = None null = None cr = b'\r' crlf = b'\r\n' dquo = b'"' dquote = b'"' lcurly = b'{' lf = b'\n' rcurly = b'}' squo = b'\'' squote = b'\'' tab = b'\t' utf8bom = b'\xef\xbb\xbf' utf16be = b'\xfe\xff' utf16le = b'\xff\xfe' # re_cache is used by custom func compile to cache previously-compiled # regular-expressions, which makes them quicker to (re)use in formulas re_cache: Dict[str, Pattern] = {} # ire_cache is like re_cache, except it's for case-insensitive regexes ire_cache: Dict[str, Pattern] = {} def arrayish(x: Any) -> bool: 'Check if a value is array-like enough.' return isinstance(x, (list, tuple, range, Generator)) def compile(s: str, case_sensitive: bool = True) -> Pattern: 'Cached regex `compiler`, so it\'s quicker to (re)use in formulas.' cache = re_cache if case_sensitive else ire_cache options = 0 if case_sensitive else IGNORECASE if s in cache: return cache[s] e = compile_uncached(s, options) cache[s] = e return e def disabled_exec(*args, **kwargs) -> None: _ = args _ = kwargs raise Exception('built-in func `exec` is disabled') def identity(x: Any) -> Any: ''' Return the value given: this is the default transformer for several higher-order funcs, which effectively keeps original items as given. ''' return x idem = identity iden = identity def ints(start, stop, f: Callable = identity) -> Iterable[int]: 'Sequence integers, end-value included.' if isnan(start) or isnan(stop) or isinf(start) or isinf(stop): return tuple() return (f(e) for e in range(int(ceil(start)), int(stop) + 1)) integers = ints def iota(n: int, f: Callable = identity) -> Iterable[int]: 'Sequence all integers from 1 up to (and including) the int given.' return (f(e) for e in range(1, n + 1)) def join(items: Iterable, sep: Union[str, Iterable] = ' ') -> Union[str, Dict]: ''' Join iterables using the separator-string given: its 2 arguments can come in either order, and are sorted out if needed. When given 2 non-string iterables, the result is an object whose keys are from the first argument, and whose values are from the second one. You can use it any of the following ways, where `keys` and `values` are sequences (lists, tuples, or generators), and `separator` is a string: join(values) join(values, separator) join(separator, values) join(keys, values) ''' if arrayish(items) and arrayish(sep): return {k: v for k, v in zip(items, sep)} if isinstance(items, str): items, sep = sep, items return sep.join(str(e) for e in items) def json0(x: Any) -> str: 'Encode value into a minimal single-line JSON string.' return dumps(x, separators=(',', ':'), allow_nan=False, indent=None) j0 = json0 def json2(x: Any) -> str: ''' Encode value into a (possibly multiline) JSON string, using 2 spaces for each indentation level. ''' return dumps(x, separators=(',', ': '), allow_nan=False, indent=2) j2 = json2 def tally(src: Iterable, by: Callable = identity) -> Dict[Any, int]: ''' Count all distinct (transformed) values, the result being a dictionary whose keys are all the transformed values, and whose items are positive integers. ''' if callable(src): src, by = by, src tally: Dict[Any, int] = {} if isinstance(src, dict): for k, v in src.items(): dk = by(k, v) if dk in tally: tally[dk] += 1 else: tally[dk] = 1 else: for v in src: dk = by(v) if dk in tally: tally[dk] += 1 else: tally[dk] = 1 return tally tallied = tally def after(x: bytes, what: bytes) -> bytes: what = tointorbytes(what) i = x.find(what) return '' if i < 0 else x[i+len(what):] def afterlast(x: bytes, what: bytes) -> bytes: what = tointorbytes(what) i = x.rfind(what) return '' if i < 0 else x[i+len(what):] afterfinal = afterlast def before(x: bytes, what: bytes) -> bytes: what = tointorbytes(what) i = x.find(what) return x if i < 0 else x[:i] def beforelast(x: bytes, what: bytes) -> bytes: what = tointorbytes(what) i = x.rfind(what) return x if i < 0 else x[:i] beforefinal = beforelast def since(x: bytes, what: bytes) -> bytes: what = tointorbytes(what) i = x.find(what) return '' if i < 0 else x[i:] def sincelast(x: bytes, what: bytes) -> bytes: what = tointorbytes(what) i = x.rfind(what) return '' if i < 0 else x[i:] sincefinal = sincelast def until(x: bytes, what: bytes) -> bytes: what = tointorbytes(what) i = x.find(what) return x if i < 0 else x[:i+len(what)] def untillast(x: bytes, what: bytes) -> bytes: what = tointorbytes(what) i = x.rfind(what) return x if i < 0 else x[:i+len(what)] untilfinal = untillast def utf8(x: bytes) -> str: return str(x, encoding='utf-8') toutf8 = utf8 utf8ify = utf8 no_input_opts = ( '=', '-None', '--None', '-nil', '--nil', '-noinput', '--noinput', '-no-input', '--no-input', '-none', '--none', '-null', '--null', '-null-input', '--null-input', '-nullinput', '--nullinput', '--n', ) traceback_opts = ( '-t', '--t', '-trace', '--trace', '-traceback', '--traceback', ) args = argv[1:] load_input = True trace_exceptions = False expression = None # handle all other leading options; the explicit help options are # handled earlier in the script while len(args) > 0: if args[0] in no_input_opts: load_input = False args = args[1:] elif args[0] in traceback_opts: trace_exceptions = True args = args[1:] else: break if len(args) > 0: expression = args[0] args = args[1:] if expression is None: print(info.strip(), file=stderr) exit(0) def make_open_read(open: Callable) -> Callable: 'Restrict the file-open func to a read-only-binary file-open func.' def open_readonly(name: str): return open(name, mode='rb') return open_readonly def seems_url(s: str) -> bool: protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') return any(s.startswith(p) for p in protocols) def tobytes(x: Any) -> bytes: if isinstance(x, bytes): return x if isinstance(x, (bool, int)): return bytes(int(x)) if isinstance(x, float): return bytes(str(x), encoding='utf-8') if isinstance(x, str): return bytes(x, encoding='utf-8') return bytes(x) def tointorbytes(x: Any) -> Union[bytes, int]: return x if isinstance(x, int) else tobytes(x) open_read = make_open_read(open) open = open_read exec = disabled_exec def adapt_result(x: Any, default: Any) -> Any: if x is True: return default if x is False: return None if callable(x): return x(default) return x def emit_result(w, x: Any) -> None: if x is None: return if isinstance(x, (list, tuple, range, Generator)): for e in x: w.write(tobytes(e)) return w.write(tobytes(x)) def eval_expr(expr, using: Any) -> Any: global v, val, value, d, dat, data # offer several aliases for the variable with the input bytes v = val = value = d = dat = data = using return adapt_result(eval(expr), using) try: expression = compile_py(expression, expression, 'eval') if args.count('-') > 1: raise Exception('can\'t use stdin (-) more than once') if load_input: for name in args: if name == '-': data = stdin.buffer.read() elif seems_url(name): from urllib.request import urlopen with urlopen(name) as inp: data = inp.read() else: with open_read(name) as inp: data = inp.read() emit_result(stdout.buffer, eval_expr(expression, data)) if len(args) == 0: data = stdin.buffer.read() emit_result(stdout.buffer, eval_expr(expression, data)) else: emit_result(stdout.buffer, eval(expression)) except BrokenPipeError: # quit quietly, instead of showing a confusing error message stderr.close() except KeyboardInterrupt: exit(2) except Exception as e: if trace_exceptions: raise e s = str(e) s = s if s else '' print(f'\x1b[31m{s}\x1b[0m', file=stderr) exit(1)