File: tlp.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 info = '''
  27 tlp [options...] [python expression] [files/URIs...]
  28 
  29 
  30 Transform Lines with Python runs a python expression on each line of text
  31 input, encoded as UTF-8. Carriage-returns are always ignored in lines, as
  32 well as any UTF-8-BOM on the first line of each input.
  33 
  34 The expression can use either `l` or `line` for the current line, and `i` as
  35 a 0-based line counter, which keeps growing, even across input-sources, when
  36 given more than one.
  37 
  38 Input-sources can be either files or web-URIs. When not given any explicit
  39 named sources, the standard input is used. It's even possible to reuse the
  40 standard input using multiple single dashes (-) in the order needed: stdin
  41 is only read once in this case, and kept for later reuse.
  42 
  43 When the expression results in None, the current input line is ignored. When
  44 the expression results in a boolean, this determines whether the line is
  45 emitted to the standard output, or ignored.
  46 
  47 When the expression emits lists, tuples, or generators, each item is emitted
  48 as its own line/result. Since empty containers emit no lines, these are the
  49 most general type of results, acting as either filters, or input-amplifiers.
  50 
  51 
  52 Examples
  53 
  54 # numbers from 0 to 5, each on its own output line; no input is read/used
  55 tlp = 'range(6)'
  56 
  57 # all powers up to the 4th, using each input line auto-parsed into a `float`
  58 tlp = 'range(1, 6)' | tlp '(float(l)**p for p in range(1, 4+1))'
  59 
  60 # separate input lines with an empty line between each; global var `empty`
  61 # can be used to avoid bothering with nested shell-quoting
  62 tlp = 'range(6)' | tlp '["", l] if i > 0 else l'
  63 
  64 # ignore errors/exceptions, in favor of the original lines/values
  65 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), line)'
  66 
  67 # ignore errors/exceptions, calling a fallback func with the exception
  68 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), str)'
  69 
  70 # filtering lines out via None values
  71 head -c 1024 /dev/urandom | strings | tlp 'l if len(l) < 20 else None'
  72 
  73 # boolean-valued results are concise ways to filter lines out
  74 head -c 1024 /dev/urandom | strings | tlp 'len(l) < 20'
  75 
  76 # function/callable results are automatically called on the current line
  77 head -c 1024 /dev/urandom | strings | tlp len
  78 '''
  79 
  80 
  81 from itertools import islice
  82 from json import dumps, loads
  83 compile_py = compile
  84 from re import compile as compile_uncached, IGNORECASE
  85 from sys import argv, exit, stderr, stdin
  86 from time import sleep
  87 from typing import Generator, Iterable
  88 
  89 
  90 if len(argv) < 2:
  91     print(info.strip(), file=stderr)
  92     exit(0)
  93 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  94     print(info.strip())
  95     exit(0)
  96 
  97 
  98 def handle_no_input(expr):
  99     res = eval(expr)
 100     if isinstance(res, (list, range, tuple, Generator)):
 101         for e in res:
 102             if not isinstance(e, Skip):
 103                 print(e, flush=True)
 104         return
 105 
 106     res = adapt_result(res, None)
 107     if not (res is None):
 108         print(res, flush=True)
 109 
 110 
 111 def handle_lines(src, expr):
 112     # `comprehension` expressions seem to ignore local variables: even
 113     # lambda-based workarounds fail
 114     global i, l, line, v, val, value
 115 
 116     i = 0
 117     for e in src:
 118         l = e.rstrip('\r\n').rstrip('\n')
 119         if i == 0:
 120             l = l.lstrip('\xef\xbb\xbf')
 121 
 122         line = l
 123         try:
 124             v = val = value = loads(l)
 125         except Exception:
 126             v = val = value = Skip()
 127         res = eval(expr)
 128         i += 1
 129 
 130         if isinstance(res, (list, range, tuple, Generator)):
 131             for e in res:
 132                 if not isinstance(e, Skip):
 133                     print(e, flush=True)
 134             continue
 135 
 136         res = adapt_result(res, line)
 137         if not (res is None):
 138             print(res, flush=True)
 139 
 140 
 141 def handle_pipe(src, expressions):
 142     # `comprehension` expressions seem to ignore local variables: even
 143     # lambda-based workarounds fail
 144     global i, l, line, v, val, value
 145     # variable names `o` and `p` work like in the `pyp` tool, except
 146     # the pipeline steps were given as separate cmd-line arguments
 147     global o, p
 148 
 149     i = 0
 150     for e in src:
 151         l = e.rstrip('\r\n').rstrip('\n')
 152         if i == 0:
 153             l = l.lstrip('\xef\xbb\xbf')
 154 
 155         line = l
 156         o = p = prev = line
 157         try:
 158             v = val = value = loads(l)
 159         except Exception:
 160             v = val = value = Skip()
 161 
 162         for expr in expressions:
 163             p = eval(expr)
 164             if callable(p):
 165                 p = p(prev)
 166             prev = p
 167 
 168         res = p
 169         i += 1
 170 
 171         if isinstance(res, (list, range, tuple, Generator)):
 172             for e in res:
 173                 if not isinstance(e, Skip):
 174                     print(e, flush=True)
 175             continue
 176 
 177         res = adapt_result(res, line)
 178         if not (res is None):
 179             print(res, flush=True)
 180 
 181 
 182 def hold_lines(src, lines):
 183     for e in src:
 184         lines.append(e)
 185         yield e
 186 
 187 
 188 def adapt_result(res, fallback):
 189     if isinstance(res, Skip):
 190         return res
 191     if res is None or res is False:
 192         return None
 193     if callable(res):
 194         return res(fallback)
 195     if res is True:
 196         return fallback
 197     if isinstance(res, dict):
 198         return dumps(res, allow_nan=False)
 199     return str(res)
 200 
 201 
 202 class Skip:
 203     pass
 204 
 205 
 206 skip = Skip()
 207 
 208 
 209 def chunk(items, chunk_size):
 210     'Break iterable into chunks, each with up to the item-count given.'
 211 
 212     if isinstance(items, str):
 213         n = len(items)
 214         while n >= chunk_size:
 215             yield items[:chunk_size]
 216             items = items[chunk_size:]
 217             n -= chunk_size
 218         if n > 0:
 219             yield items
 220         return
 221 
 222     if not isinstance(chunk_size, int):
 223         raise Exception('non-integer chunk-size')
 224     if chunk_size < 1:
 225         raise Exception('non-positive chunk-size')
 226 
 227     it = iter(items)
 228     while True:
 229         head = tuple(islice(it, chunk_size))
 230         if not head:
 231             return
 232         yield head
 233 
 234 chunked = chunk
 235 
 236 
 237 # re_cache is used by custom func compile to cache previously-compiled
 238 # regular-expressions, which makes them quicker to (re)use in formulas
 239 re_cache = {}
 240 
 241 # ire_cache is like re_cache, except it's for case-insensitive regexes
 242 ire_cache = {}
 243 
 244 
 245 def compile(expr, flags = 0):
 246     'Speed-up using regexes across lines, by avoiding recompilations.'
 247 
 248     if flags != 0 and flags != IGNORECASE:
 249         msg = 'only the default and case-insensitive options are supported'
 250         raise Exception(msg)
 251 
 252     cache = re_cache if flags == 0 else ire_cache
 253     if expr in cache:
 254         return cache[expr]
 255 
 256     pat = compile_uncached(expr, flags)
 257     cache[expr] = pat
 258     return pat
 259 
 260 
 261 def icompile(expr):
 262     return compile(expr, IGNORECASE)
 263 
 264 
 265 def cond(*args):
 266     if len(args) == 0:
 267         return None
 268 
 269     for i, e in enumerate(args):
 270         if i % 2 == 0 and i < len(args) - 1 and e:
 271             return args[i + 1]
 272 
 273     return args[-1] if len(args) % 2 == 1 else None
 274 
 275 
 276 def dive(into, using):
 277     'Depth-first recursive caller for 1-input functions.'
 278 
 279     if callable(into):
 280         into, using = using, into
 281 
 282     def rec(v):
 283         if isinstance(v, dict):
 284             return {k: rec(v) for k, v in v.items()}
 285         if isinstance(v, Iterable) and not isinstance(v, str):
 286             return [rec(v) for v in v]
 287         return using(v)
 288 
 289     return rec(into)
 290 
 291 
 292 def divekeys(into, using):
 293     'Depth-first recursive caller for 2-input funcs which rename dict keys.'
 294 
 295     if callable(into):
 296         into, using = using, into
 297 
 298     def rec(v):
 299         if isinstance(v, dict):
 300             return {using(k): rec(v) for k, v in v.items()}
 301         if isinstance(v, Iterable) and not isinstance(v, str):
 302             return [rec(v) for i, v in enumerate(v)]
 303         return v
 304 
 305     return rec(None, into)
 306 
 307 
 308 def divekv(into, using, using2 = None):
 309     'Depth-first recursive caller for 2-input functions.'
 310 
 311     if using2 is None:
 312         if callable(into):
 313             into, using = using, into
 314     else:
 315         if not callable(using2):
 316             into, using, using2 = using2, into, using
 317 
 318     def rec(k, v):
 319         if isinstance(v, dict):
 320             return {k: rec(k, v) for k, v in v.items()}
 321         if isinstance(v, Iterable) and not isinstance(v, str):
 322             return [rec(i, v) for i, v in enumerate(v)]
 323         return using(k, v)
 324 
 325     def rec2(k, v):
 326         if isinstance(v, dict):
 327             return {str(using(k, v)): rec2(k, v) for k, v in v.items()}
 328         if isinstance(v, Iterable) and not isinstance(v, str):
 329             # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)}
 330             return [rec2(i, v) for i, v in enumerate(v)]
 331         return using2(k, v)
 332 
 333     return rec(None, into) if using2 is None else rec2(None, into)
 334 
 335 kvdive = divekv
 336 
 337 
 338 def drop(src, *what):
 339     if isinstance(src, str):
 340         for s in what:
 341             src = src.replace(s, '')
 342         return src
 343 
 344     def kdrop(src, what):
 345         kv = {}
 346         for k, v in src.items():
 347             if not (k in what):
 348                 kv[k] = v
 349         return kv
 350 
 351     if isinstance(src, dict):
 352         return kdrop(src, set(what))
 353 
 354     if isinstance(src, Iterable):
 355         what = set(what)
 356         return [kdrop(e, what) for e in src]
 357 
 358     return None
 359 
 360 dropped = drop
 361 
 362 
 363 def join(x, y = ' '):
 364     'Join values into a string, or make a dict from keys and values.'
 365 
 366     if isinstance(x, str):
 367         return x.join(str(v) for v in y)
 368     if isinstance(y, str):
 369         return y.join(str(v) for v in x)
 370     return {k: v for k, v in zip(x, y)}
 371 
 372 
 373 def pick(src, *keys):
 374     if isinstance(src, dict):
 375         return {k: src.get(k, None) for k in keys}
 376     return [{k: e.get(k, None) for k in keys} for e in src]
 377 
 378 
 379 def rescue(attempt, fallback = None):
 380     try:
 381         return attempt()
 382     except Exception as e:
 383         if callable(fallback):
 384             return fallback(e)
 385         return fallback
 386 
 387 catch = rescue
 388 catched = rescue
 389 caught = rescue
 390 recover = rescue
 391 recovered = rescue
 392 rescued = rescue
 393 
 394 
 395 def retype(x):
 396     'Try to narrow the type of the value given.'
 397 
 398     if isinstance(x, float):
 399         n = int(x)
 400         return n if float(n) == x else x
 401 
 402     if not isinstance(x, str):
 403         return x
 404 
 405     try:
 406         return loads(x)
 407     except Exception:
 408         pass
 409 
 410     try:
 411         return int(x)
 412     except Exception:
 413         pass
 414 
 415     try:
 416         return float(x)
 417     except Exception:
 418         pass
 419 
 420     return x
 421 
 422 autocast = retype
 423 autocasted = retype
 424 mold = retype
 425 molded = retype
 426 recast = retype
 427 recasted = retype
 428 remold = retype
 429 remolded = retype
 430 retyped = retype
 431 
 432 
 433 def json0(x):
 434     return dumps(x, separators=(',', ':'), allow_nan=False, indent=None)
 435 
 436 j0 = json0
 437 
 438 def jsonl(x):
 439     if isinstance(x, Skip):
 440         return
 441 
 442     def emit(x):
 443         sep = (', ', ': ')
 444         return dumps(x, separators=sep, allow_nan=False, indent=None)
 445 
 446     if x is None:
 447         yield emit(x)
 448         return
 449 
 450     if isinstance(x, (bool, int, float, dict, str)):
 451         yield emit(x)
 452         return
 453 
 454     if isinstance(x, Iterable):
 455         for e in x:
 456             if isinstance(e, Skip):
 457                 continue
 458             yield emit(x)
 459         return
 460 
 461     yield emit(str(x))
 462 
 463 jl = jsonl
 464 jsonlines = jsonl
 465 ndjson = jsonl
 466 
 467 
 468 def typeof(x):
 469     # return str(type(x))
 470     return {
 471         type(None): 'null',
 472         bool: 'boolean',
 473         dict: 'object',
 474         float: 'number',
 475         int: 'number',
 476         str: 'string',
 477         list: 'array',
 478         tuple: 'array',
 479     }.get(type(x), 'other')
 480 
 481 jstype = typeof
 482 
 483 
 484 def wait(seconds, result):
 485     'Wait the given number of seconds, before returning its latter arg.'
 486 
 487     t = (int, float)
 488     if (not isinstance(seconds, t)) and isinstance(result, t):
 489         seconds, result = result, seconds
 490     sleep(seconds)
 491     return result
 492 
 493 delay = wait
 494 
 495 
 496 def after(x, what):
 497     i = x.find(what)
 498     return '' if i < 0 else x[i+len(what):]
 499 
 500 def afterlast(x, what):
 501     i = x.rfind(what)
 502     return '' if i < 0 else x[i+len(what):]
 503 
 504 afterfinal = afterlast
 505 
 506 def before(x, what):
 507     i = x.find(what)
 508     return x if i < 0 else x[:i]
 509 
 510 def beforelast(x, what):
 511     i = x.rfind(what)
 512     return x if i < 0 else x[:i]
 513 
 514 beforefinal = beforelast
 515 
 516 def since(x, what):
 517     i = x.find(what)
 518     return '' if i < 0 else x[i:]
 519 
 520 def sincelast(x, what):
 521     i = x.rfind(what)
 522     return '' if i < 0 else x[i:]
 523 
 524 sincefinal = sincelast
 525 
 526 def until(x, what):
 527     i = x.find(what)
 528     return x if i < 0 else x[:i+len(what)]
 529 
 530 def untilfinal(x, what):
 531     i = x.rfind(what)
 532     return x if i < 0 else x[:i+len(what)]
 533 
 534 untillast = untilfinal
 535 
 536 
 537 def blue(s):
 538     return f'\x1b[38;2;0;95;215m{s}\x1b[0m'
 539 
 540 def blueback(s):
 541     return f'\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m{s}\x1b[0m'
 542 
 543 bluebg = blueback
 544 
 545 def bold(s):
 546     return f'\x1b[1m{s}\x1b[0m'
 547 
 548 bolded = bold
 549 
 550 def gbm(s, good = False, bad = False, meh = False):
 551     '''
 552     Good, Bad, Meh ANSI-styles a plain string via ANSI-style sequences,
 553     according to 1..3 conditions given as boolean(ish) values: these are
 554     checked in order, so the first truish one wins.
 555     '''
 556 
 557     if good:
 558         return green(s)
 559     if bad:
 560         return red(s)
 561     if meh:
 562         return gray(s)
 563     return s
 564 
 565 def gray(s):
 566     return f'\x1b[38;2;168;168;168m{s}\x1b[0m'
 567 
 568 def grayback(s):
 569     return f'\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m{s}\x1b[0m'
 570 
 571 graybg = grayback
 572 
 573 def green(s):
 574     return f'\x1b[38;2;0;135;95m{s}\x1b[0m'
 575 
 576 def greenback(s):
 577     return f'\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m{s}\x1b[0m'
 578 
 579 greenbg = greenback
 580 
 581 def highlight(s):
 582     return f'\x1b[7m{s}\x1b[0m'
 583 
 584 hilite = highlight
 585 
 586 def magenta(s):
 587     return f'\x1b[38;2;215;0;255m{s}\x1b[0m'
 588 
 589 def magentaback(s):
 590     return f'\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m{s}\x1b[0m'
 591 
 592 magback = magentaback
 593 magbg = magentaback
 594 magentabg = magentaback
 595 
 596 def orange(s):
 597     return f'\x1b[38;2;215;95;0m{s}\x1b[0m'
 598 
 599 def orangeback(s):
 600     return f'\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m{s}\x1b[0m'
 601 
 602 orangebg = orangeback
 603 orback = orangeback
 604 orbg = orangeback
 605 
 606 def purple(s):
 607     return f'\x1b[38;2;135;95;255m{s}\x1b[0m'
 608 
 609 def purpleback(s):
 610     return f'\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m{s}\x1b[0m'
 611 
 612 purback = purpleback
 613 purbg = purpleback
 614 purplebg = purpleback
 615 
 616 def red(s):
 617     return f'\x1b[38;2;204;0;0m{s}\x1b[0m'
 618 
 619 def redback(s):
 620     return f'\x1b[38;2;204;0;0m\x1b[38;2;238;238;238m{s}\x1b[0m'
 621 
 622 redbg = redback
 623 
 624 def underline(s):
 625     return f'\x1b[4m{s}\x1b[0m'
 626 
 627 underlined = underline
 628 
 629 
 630 
 631 def fail(msg, code = 1):
 632     print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr)
 633     exit(code)
 634 
 635 
 636 def make_open_utf8(open):
 637     def open_utf8_readonly(path):
 638         return open(path, encoding='utf-8')
 639     return open_utf8_readonly
 640 
 641 
 642 def message(msg, result = None):
 643     print(msg, file=stderr)
 644     return result
 645 
 646 msg = message
 647 
 648 
 649 def seemsurl(path):
 650     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 651     return any(path.startswith(p) for p in protocols)
 652 
 653 
 654 cr = '\r'
 655 crlf = '\r\n'
 656 dquo = '"'
 657 dquote = '"'
 658 empty = ''
 659 lcurly = '{'
 660 lf = '\n'
 661 rcurly = '}'
 662 s = ''
 663 squo = '\''
 664 squote = '\''
 665 # utf8bom = '\xef\xbb\xbf'
 666 
 667 nil = None
 668 none = None
 669 null = None
 670 
 671 
 672 exec = None
 673 open_utf8 = make_open_utf8(open)
 674 open = open_utf8
 675 
 676 no_input_opts = (
 677     '=', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null',
 678 )
 679 modules_opts = (
 680     '-m', '--m', '-mod', '--mod', '-module', '--module',
 681     '-modules', '--modules',
 682 )
 683 more_modules_opts = ('-mm', '--mm', '-more', '--more')
 684 pipe_opts = ('-p', '--p', '-pipe', '--pipe')
 685 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback')
 686 
 687 args = argv[1:]
 688 if any(seemsurl(e) for e in args):
 689     from io import TextIOWrapper
 690     from urllib.request import urlopen
 691 
 692 no_input = False
 693 pipe_mode = False
 694 trace_errors = False
 695 
 696 while len(args) > 0:
 697     if args[0] in no_input_opts:
 698         no_input = True
 699         args = args[1:]
 700         continue
 701 
 702     if args[0] in pipe_opts:
 703         pipe_mode = True
 704         args = args[1:]
 705         break
 706 
 707     if args[0] in modules_opts:
 708         try:
 709             if len(args) < 2:
 710                 msg = 'a module name or a comma-separated list of modules'
 711                 raise Exception('expected ' + msg)
 712 
 713             g = globals()
 714             from importlib import import_module
 715             for e in args[1].split(','):
 716                 g[e] = import_module(e)
 717 
 718             g = None
 719             import_module = None
 720             args = args[2:]
 721         except Exception as e:
 722             fail(e, 1)
 723 
 724         continue
 725 
 726     if args[0] in more_modules_opts:
 727         import functools, itertools, json, math, random, statistics, string, time
 728         args = args[1:]
 729         continue
 730 
 731     if args[0] in trace_opts:
 732         trace_errors = True
 733         args = args[1:]
 734         continue
 735 
 736     break
 737 
 738 
 739 try:
 740     if pipe_mode:
 741         if no_input:
 742             raise Exception('can\'t use pipe-mode when input is disabled')
 743         exprs = [compile_py(e, e, mode='eval') for e in args]
 744         compile_py = None
 745         handle_pipe(stdin, exprs)
 746         exit(0)
 747 
 748     expr = '.'
 749     if len(args) > 0:
 750         expr = args[0]
 751         args = args[1:]
 752 
 753     if expr == '.' and no_input:
 754         print(info.strip(), file=stderr)
 755         exit(0)
 756 
 757     if expr == '.':
 758         expr = 'line'
 759 
 760     expr = compile_py(expr, expr, mode='eval')
 761     compile_py = None
 762 
 763     if no_input:
 764         handle_no_input(expr)
 765         exit(0)
 766 
 767     if len(args) == 0:
 768         handle_lines(stdin, expr)
 769         exit(0)
 770 
 771     got_stdin = False
 772     all_stdin = None
 773     dashes = args.count('-')
 774 
 775     for path in args:
 776         if path == '-':
 777             if dashes > 1:
 778                 if not got_stdin:
 779                     all_stdin = []
 780                     handle_lines(hold_lines(stdin, all_stdin), expr)
 781                     got_stdin = True
 782                 else:
 783                     handle_lines(all_stdin, expr)
 784             else:
 785                 handle_lines(stdin, expr)
 786             continue
 787 
 788         if seemsurl(path):
 789             with urlopen(path) as inp:
 790                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 791                     handle_lines(txt, expr)
 792             continue
 793 
 794         with open_utf8(path) as txt:
 795             handle_lines(txt, expr)
 796 except BrokenPipeError:
 797     # quit quietly, instead of showing a confusing error message
 798     stderr.close()
 799     exit(0)
 800 except KeyboardInterrupt:
 801     # stderr.close()
 802     exit(2)
 803 except Exception as e:
 804     if trace_errors:
 805         raise e
 806     else:
 807         fail(e, 1)