File: tlp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tlp [options...] [python expression] [files/URIs...] 28 29 30 Transform Lines with Python runs a python expression on each line of text 31 input, encoded as UTF-8. Carriage-returns are always ignored in lines, as 32 well as any UTF-8-BOM on the first line of each input. 33 34 The expression can use either `l` or `line` for the current line, and `i` as 35 a 0-based line counter which keeps growing even across input-sources, when 36 given multiple inputs. Also available is `n`, a 1-based line counter which 37 otherwise works the same way. 38 39 Each line is automatically parsed as JSON: when successful, the parsed line 40 is available to the expression as `v`, or `value`, with value `err` set to 41 None, since parsing succeeded; on failure, `v` and `value` are set to None, 42 while `err` has the exception as a value. You can check success/failure by 43 checking if `err` is None, or not. 44 45 Input-sources can be either files or web-URIs. When not given any explicit 46 named sources, the standard input is used. It's even possible to reuse the 47 standard input using multiple single dashes (-) in the order needed: stdin 48 is only read once in this case, and kept for later reuse. 49 50 When the expression results in None, the current input line is ignored. When 51 the expression results in a boolean, its value determines whether each line 52 is emitted to the standard output, or ignored. 53 54 When the expression emits lists, tuples, or generators, each item is emitted 55 as its own line/result. Since empty containers emit no lines, these are the 56 most general type of results, acting as either filters, or input-amplifiers. 57 58 59 Examples 60 61 # numbers from 0 to 5, each on its own output line; no input is read/used 62 tlp = 'range(6)' 63 64 # all powers up to the 4th, using each input line auto-parsed into a `float` 65 tlp = 'range(1, 6)' | tlp '(v**p for p in range(1, 4+1))' 66 67 # separate input lines with an empty line between each; global var `empty` 68 # can be used to avoid bothering with nested shell-quoting 69 tlp = 'range(6)' | tlp '["", l] if i > 0 else l' 70 71 # ignore errors/exceptions, in favor of the original lines/values 72 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), line)' 73 74 # ignore errors/exceptions, calling a fallback func with the exception 75 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), str)' 76 77 # filtering lines out via None values 78 head -c 1024 /dev/urandom | strings | tlp 'l if len(l) < 20 else None' 79 80 # boolean-valued results are concise ways to filter lines out 81 head -c 1024 /dev/urandom | strings | tlp 'len(l) < 20' 82 83 # function/callable results are automatically called on the current line 84 head -c 1024 /dev/urandom | strings | tlp len 85 86 # emit 10 random integers between 1 and 10 87 tlp -m random = '(random.randint(1, 10) for _ in range(10))' 88 89 # emit documentation for collections.defaultdict from the python stdlib 90 tlp = -m collections 'help(collections.defaultdict)' | cat 91 ''' 92 93 94 from itertools import islice 95 from json import dumps, loads 96 from re import compile as compile_uncached, IGNORECASE 97 from sys import argv, exit, stderr, stdin 98 from time import sleep 99 from typing import Generator, Iterable 100 101 102 if len(argv) < 2: 103 print(info.strip(), file=stderr) 104 exit(0) 105 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 106 print(info.strip()) 107 exit(0) 108 109 110 def handle_no_input(expr): 111 res = eval(expr) 112 if isinstance(res, (list, range, tuple, Generator)): 113 for e in res: 114 e = adapt_result(e, None) 115 if not (e is None): 116 print(e, flush=True) 117 return 118 119 res = adapt_result(res, None) 120 if not (res is None): 121 print(res, flush=True) 122 123 def handle_lines(src, expr): 124 # `comprehension` expressions seem to ignore local variables: even 125 # lambda-based workarounds fail 126 global i, n, l, line, v, val, value, e, err, error 127 128 i = 0 129 n = 1 130 e = err = error = None 131 132 for l in src: 133 l = l.rstrip('\r\n').rstrip('\n') 134 if i == 0: 135 l = l.lstrip('\xef\xbb\xbf') 136 137 line = l 138 try: 139 e = err = error = None 140 v = val = value = loads(l) 141 except Exception as ex: 142 e = err = error = ex 143 v = val = value = Skip() 144 res = eval(expr) 145 i += 1 146 n += 1 147 148 if isinstance(res, (list, range, tuple, Generator)): 149 for e in res: 150 e = adapt_result(e, None) 151 if not (e is None): 152 print(e, flush=True) 153 continue 154 155 res = adapt_result(res, line) 156 if not (res is None): 157 print(res, flush=True) 158 159 def handle_pipe(src, funcs): 160 # `comprehension` expressions seem to ignore local variables: even 161 # lambda-based workarounds fail 162 global i, n, l, line, v, val, value, e, err, error 163 # variable names `o` and `p` work like in the `pyp` tool, except 164 # the pipeline steps were given as separate cmd-line arguments 165 global o, p 166 167 i = 0 168 n = 1 169 e = err = error = None 170 171 for l in src: 172 l = l.rstrip('\r\n').rstrip('\n') 173 if i == 0: 174 l = l.lstrip('\xef\xbb\xbf') 175 176 line = l 177 o = p = prev = line 178 try: 179 e = err = error = None 180 v = val = value = loads(l) 181 except Exception as ex: 182 e = err = error = ex 183 v = val = value = Skip() 184 185 for f in funcs: 186 p = f(p) 187 if callable(p): 188 p = p(prev) 189 prev = p 190 191 res = p 192 i += 1 193 n += 1 194 195 if isinstance(res, (list, range, tuple, Generator)): 196 for e in res: 197 e = adapt_result(e, None) 198 if not (e is None): 199 print(e, flush=True) 200 continue 201 202 res = adapt_result(res, line) 203 if not (res is None): 204 print(res, flush=True) 205 206 def hold_lines(src, lines): 207 for e in src: 208 lines.append(e) 209 yield e 210 211 def adapt_result(res, fallback): 212 if isinstance(res, BaseException): 213 raise res 214 if isinstance(res, Skip) or res is None or res is False: 215 return None 216 if callable(res): 217 return res(fallback) 218 if res is True: 219 return fallback 220 if isinstance(res, dict): 221 return dumps(res, allow_nan=False) 222 return str(res) 223 224 def fail(msg, code = 1): 225 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 226 exit(code) 227 228 def make_open_utf8(open): 229 def open_utf8_readonly(path): 230 return open(path, encoding='utf-8') 231 return open_utf8_readonly 232 233 def seemsurl(path): 234 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 235 return any(path.startswith(p) for p in protocols) 236 237 class Skip: 238 pass 239 240 skip = Skip() 241 242 def chunk(items, chunk_size): 243 'Break iterable into chunks, each with up to the item-count given.' 244 245 if isinstance(items, str): 246 n = len(items) 247 while n >= chunk_size: 248 yield items[:chunk_size] 249 items = items[chunk_size:] 250 n -= chunk_size 251 if n > 0: 252 yield items 253 return 254 255 if not isinstance(chunk_size, int): 256 raise Exception('non-integer chunk-size') 257 if chunk_size < 1: 258 raise Exception('non-positive chunk-size') 259 260 it = iter(items) 261 while True: 262 head = tuple(islice(it, chunk_size)) 263 if not head: 264 return 265 yield head 266 267 chunked = chunk 268 269 # re_cache is used by custom func compile to cache previously-compiled 270 # regular-expressions, which makes them quicker to (re)use in formulas 271 re_cache = {} 272 273 def re_compile(expr, flags = 0): 274 'Speed-up using regexes across lines, by avoiding recompilations.' 275 276 if flags in re_cache: 277 cache = re_cache[flags] 278 else: 279 cache = {} 280 re_cache[flags] = cache 281 if expr in cache: 282 return cache[expr] 283 284 pat = compile_uncached(expr, flags) 285 cache[expr] = pat 286 return pat 287 288 def icompile(expr): 289 return re_compile(expr, IGNORECASE) 290 291 def cond(*args): 292 if len(args) == 0: 293 return None 294 295 for i, e in enumerate(args): 296 if i % 2 == 0 and i < len(args) - 1 and e: 297 return args[i + 1] 298 299 return args[-1] if len(args) % 2 == 1 else None 300 301 def dive(into, using): 302 'Depth-first recursive caller for 1-input functions.' 303 304 if callable(into): 305 into, using = using, into 306 307 def rec(v): 308 if isinstance(v, dict): 309 return {k: rec(v) for k, v in v.items()} 310 if isinstance(v, Iterable) and not isinstance(v, str): 311 return [rec(v) for v in v] 312 return using(v) 313 314 return rec(into) 315 316 def divekeys(into, using): 317 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 318 319 if callable(into): 320 into, using = using, into 321 322 def rec(v): 323 if isinstance(v, dict): 324 return {using(k): rec(v) for k, v in v.items()} 325 if isinstance(v, Iterable) and not isinstance(v, str): 326 return [rec(v) for i, v in enumerate(v)] 327 return v 328 329 return rec(None, into) 330 331 def divekv(into, using, using2 = None): 332 'Depth-first recursive caller for 2-input functions.' 333 334 if using2 is None: 335 if callable(into): 336 into, using = using, into 337 else: 338 if not callable(using2): 339 into, using, using2 = using2, into, using 340 341 def rec(k, v): 342 if isinstance(v, dict): 343 return {k: rec(k, v) for k, v in v.items()} 344 if isinstance(v, Iterable) and not isinstance(v, str): 345 return [rec(i, v) for i, v in enumerate(v)] 346 return using(k, v) 347 348 def rec2(k, v): 349 if isinstance(v, dict): 350 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 351 if isinstance(v, Iterable) and not isinstance(v, str): 352 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 353 return [rec2(i, v) for i, v in enumerate(v)] 354 return using2(k, v) 355 356 return rec(None, into) if using2 is None else rec2(None, into) 357 358 kvdive = divekv 359 360 def drop(src, *what): 361 if isinstance(src, str): 362 for s in what: 363 src = src.replace(s, '') 364 return src 365 366 def kdrop(src, what): 367 return {k: v for (k, v) in src.items() if not (k in what)} 368 369 if isinstance(src, dict): 370 return kdrop(src, set(what)) 371 372 if isinstance(src, Iterable): 373 what = set(what) 374 return [kdrop(e, what) for e in src if isinstance(e, dict)] 375 376 return None 377 378 dropped = drop 379 380 def join(x, y = ' '): 381 'Join values into a string, or make a dict from keys and values.' 382 383 if isinstance(x, str): 384 return x.join(str(v) for v in y) 385 if isinstance(y, str): 386 return y.join(str(v) for v in x) 387 return {k: v for k, v in zip(x, y)} 388 389 def pick(src, *keys): 390 if isinstance(src, dict): 391 return {k: src.get(k, None) for k in keys} 392 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 393 394 picked = pick 395 396 def rescue(attempt, fallback = None): 397 try: 398 return attempt() 399 except Exception as e: 400 if callable(fallback): 401 return fallback(e) 402 return fallback 403 404 catch = rescue 405 recover = rescue 406 rescued = rescue 407 408 def retype(x): 409 'Try to narrow the type of the value given.' 410 411 if isinstance(x, float): 412 n = int(x) 413 return n if float(n) == x else x 414 415 if not isinstance(x, str): 416 return x 417 418 try: 419 return loads(x) 420 except Exception: 421 pass 422 423 try: 424 return int(x) 425 except Exception: 426 pass 427 428 try: 429 return float(x) 430 except Exception: 431 pass 432 433 return x 434 435 autocast = retype 436 autocasted = retype 437 mold = retype 438 molded = retype 439 recast = retype 440 recasted = retype 441 remold = retype 442 remolded = retype 443 retyped = retype 444 445 def json0(x): 446 return dumps(x, separators=(',', ':'), allow_nan=False, indent=None) 447 448 j0 = json0 449 450 def jsonl(x): 451 if isinstance(x, Skip): 452 return 453 454 def emit(x): 455 return dumps(x, separators=(', ', ': '), allow_nan=False, indent=None) 456 457 if x is None: 458 yield emit(x) 459 return 460 461 if isinstance(x, (bool, int, float, dict, str)): 462 yield emit(x) 463 return 464 465 if isinstance(x, Iterable): 466 for e in x: 467 if isinstance(e, Skip): 468 continue 469 yield emit(x) 470 return 471 472 yield emit(str(x)) 473 474 jl = jsonl 475 jsonlines = jsonl 476 ndjson = jsonl 477 478 def typeof(x): 479 # return str(type(x)) 480 return { 481 type(None): 'null', 482 bool: 'boolean', 483 dict: 'object', 484 float: 'number', 485 int: 'number', 486 str: 'string', 487 list: 'array', 488 tuple: 'array', 489 }.get(type(x), 'other') 490 491 jstype = typeof 492 493 def wait(seconds, result): 494 'Wait the given number of seconds, before returning its latter arg.' 495 496 t = (int, float) 497 if (not isinstance(seconds, t)) and isinstance(result, t): 498 seconds, result = result, seconds 499 sleep(seconds) 500 return result 501 502 delay = wait 503 504 def after(x, what): 505 i = x.find(what) 506 return '' if i < 0 else x[i+len(what):] 507 508 def afterlast(x, what): 509 i = x.rfind(what) 510 return '' if i < 0 else x[i+len(what):] 511 512 afterfinal = afterlast 513 514 def before(x, what): 515 i = x.find(what) 516 return x if i < 0 else x[:i] 517 518 def beforelast(x, what): 519 i = x.rfind(what) 520 return x if i < 0 else x[:i] 521 522 beforefinal = beforelast 523 524 def since(x, what): 525 i = x.find(what) 526 return '' if i < 0 else x[i:] 527 528 def sincelast(x, what): 529 i = x.rfind(what) 530 return '' if i < 0 else x[i:] 531 532 sincefinal = sincelast 533 534 def until(x, what): 535 i = x.find(what) 536 return x if i < 0 else x[:i+len(what)] 537 538 def untilfinal(x, what): 539 i = x.rfind(what) 540 return x if i < 0 else x[:i+len(what)] 541 542 untillast = untilfinal 543 544 def blue(s): 545 return f'\x1b[38;2;0;95;215m{s}\x1b[0m' 546 547 def blueback(s): 548 return f'\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m{s}\x1b[0m' 549 550 bluebg = blueback 551 552 def bold(s): 553 return f'\x1b[1m{s}\x1b[0m' 554 555 bolded = bold 556 557 def gbm(s, good = False, bad = False, meh = False): 558 ''' 559 Good, Bad, Meh ANSI-styles a plain string via ANSI-style sequences, 560 according to 1..3 conditions given as boolean(ish) values: these are 561 checked in order, so the first truish one wins. 562 ''' 563 564 if good: 565 return green(s) 566 if bad: 567 return red(s) 568 if meh: 569 return gray(s) 570 return s 571 572 def gray(s): 573 return f'\x1b[38;2;168;168;168m{s}\x1b[0m' 574 575 def grayback(s): 576 return f'\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m{s}\x1b[0m' 577 578 graybg = grayback 579 580 def green(s): 581 return f'\x1b[38;2;0;135;95m{s}\x1b[0m' 582 583 def greenback(s): 584 return f'\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m{s}\x1b[0m' 585 586 greenbg = greenback 587 588 def highlight(s): 589 return f'\x1b[7m{s}\x1b[0m' 590 591 hilite = highlight 592 593 def magenta(s): 594 return f'\x1b[38;2;215;0;255m{s}\x1b[0m' 595 596 def magentaback(s): 597 return f'\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 598 599 magback = magentaback 600 magbg = magentaback 601 magentabg = magentaback 602 603 def orange(s): 604 return f'\x1b[38;2;215;95;0m{s}\x1b[0m' 605 606 def orangeback(s): 607 return f'\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 608 609 orangebg = orangeback 610 orback = orangeback 611 orbg = orangeback 612 613 def purple(s): 614 return f'\x1b[38;2;135;95;255m{s}\x1b[0m' 615 616 def purpleback(s): 617 return f'\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 618 619 purback = purpleback 620 purbg = purpleback 621 purplebg = purpleback 622 623 def red(s): 624 return f'\x1b[38;2;204;0;0m{s}\x1b[0m' 625 626 def redback(s): 627 return f'\x1b[38;2;204;0;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 628 629 redbg = redback 630 631 def underline(s): 632 return f'\x1b[4m{s}\x1b[0m' 633 634 underlined = underline 635 636 def message(msg, result = None): 637 print(msg, file=stderr) 638 return result 639 640 msg = message 641 642 seen = set() 643 def once(x): 644 if x in seen: 645 return None 646 seen.add(x) 647 return x 648 649 def utf8(x): 650 try: 651 if isinstance(x, str): 652 x = x.encode('utf-8') 653 return str(x, 'utf-8') 654 except Exception: 655 return None 656 657 658 cr = '\r' 659 crlf = '\r\n' 660 dquo = '"' 661 dquote = '"' 662 empty = '' 663 lcurly = '{' 664 lf = '\n' 665 rcurly = '}' 666 space = ' ' 667 squo = '\'' 668 squote = '\'' 669 tab = '\t' 670 671 nil = None 672 none = None 673 null = None 674 675 676 exec = None 677 open_utf8 = make_open_utf8(open) 678 open = open_utf8 679 680 no_input_opts = ( 681 '=', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 682 ) 683 modules_opts = ( 684 '-m', '--m', '-mod', '--mod', '-module', '--module', 685 '-modules', '--modules', 686 ) 687 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 688 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 689 690 args = argv[1:] 691 if any(seemsurl(e) for e in args): 692 from io import TextIOWrapper 693 from urllib.request import urlopen 694 695 no_input = False 696 pipe_mode = False 697 trace_errors = False 698 699 while len(args) > 0: 700 if args[0] == '--': 701 args = args[1:] 702 break 703 704 if args[0] in no_input_opts: 705 no_input = True 706 args = args[1:] 707 continue 708 709 if args[0] in pipe_opts: 710 pipe_mode = True 711 args = args[1:] 712 break 713 714 if args[0] in modules_opts: 715 try: 716 if len(args) < 2: 717 msg = 'a module name or a comma-separated list of modules' 718 raise Exception('expected ' + msg) 719 720 g = globals() 721 from importlib import import_module 722 for e in args[1].split(','): 723 g[e] = import_module(e) 724 725 g = None 726 import_module = None 727 args = args[2:] 728 except Exception as e: 729 fail(e, 1) 730 731 continue 732 733 if args[0] in trace_opts: 734 trace_errors = True 735 args = args[1:] 736 continue 737 738 break 739 740 741 try: 742 if pipe_mode: 743 if no_input: 744 raise Exception('can\'t use pipe-mode when input is disabled') 745 steps = [eval(s) for s in args] 746 compile = None 747 eval = None 748 exec = None 749 open = None 750 handle_pipe(stdin, steps) 751 exit(0) 752 753 expr = '.' 754 if len(args) > 0: 755 expr = args[0] 756 args = args[1:] 757 758 if expr == '.' and no_input: 759 print(info.strip(), file=stderr) 760 exit(0) 761 762 if expr == '.': 763 expr = 'line' 764 765 expr = compile(expr, expr, mode='eval') 766 compile = None 767 768 if no_input: 769 handle_no_input(expr) 770 exit(0) 771 772 if len(args) == 0: 773 handle_lines(stdin, expr) 774 exit(0) 775 776 got_stdin = False 777 all_stdin = None 778 dashes = args.count('-') 779 780 for path in args: 781 if path == '-': 782 if dashes > 1: 783 if not got_stdin: 784 all_stdin = [] 785 handle_lines(hold_lines(stdin, all_stdin), expr) 786 got_stdin = True 787 else: 788 handle_lines(all_stdin, expr) 789 else: 790 handle_lines(stdin, expr) 791 continue 792 793 if seemsurl(path): 794 with urlopen(path) as inp: 795 with TextIOWrapper(inp, encoding='utf-8') as txt: 796 handle_lines(txt, expr) 797 continue 798 799 with open_utf8(path) as txt: 800 handle_lines(txt, expr) 801 except BrokenPipeError: 802 # quit quietly, instead of showing a confusing error message 803 stderr.close() 804 exit(0) 805 except KeyboardInterrupt: 806 exit(2) 807 except Exception as e: 808 if trace_errors: 809 raise e 810 else: 811 fail(e, 1)