File: tlp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tlp [options...] [python expression] [files/URIs...] 28 29 30 Transform Lines with Python runs a python expression on each line of text 31 input, encoded as UTF-8. Carriage-returns are always ignored in lines, as 32 well as any UTF-8-BOM on the first line of each input. 33 34 The expression can use either `l` or `line` for the current line, and `i` as 35 a 0-based line counter which keeps growing even across input-sources, when 36 given multiple inputs. 37 38 Input-sources can be either files or web-URIs. When not given any explicit 39 named sources, the standard input is used. It's even possible to reuse the 40 standard input using multiple single dashes (-) in the order needed: stdin 41 is only read once in this case, and kept for later reuse. 42 43 When the expression results in None, the current input line is ignored. When 44 the expression results in a boolean, this determines whether the line is 45 emitted to the standard output, or ignored. 46 47 When the expression emits lists, tuples, or generators, each item is emitted 48 as its own line/result. Since empty containers emit no lines, these are the 49 most general type of results, acting as either filters, or input-amplifiers. 50 51 52 Examples 53 54 # numbers from 0 to 5, each on its own output line; no input is read/used 55 tlp = 'range(6)' 56 57 # all powers up to the 4th, using each input line auto-parsed into a `float` 58 tlp = 'range(1, 6)' | tlp '(float(l)**p for p in range(1, 4+1))' 59 60 # separate input lines with an empty line between each; global var `empty` 61 # can be used to avoid bothering with nested shell-quoting 62 tlp = 'range(6)' | tlp '["", l] if i > 0 else l' 63 64 # ignore errors/exceptions, in favor of the original lines/values 65 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), line)' 66 67 # ignore errors/exceptions, calling a fallback func with the exception 68 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), str)' 69 70 # filtering lines out via None values 71 head -c 1024 /dev/urandom | strings | tlp 'l if len(l) < 20 else None' 72 73 # boolean-valued results are concise ways to filter lines out 74 head -c 1024 /dev/urandom | strings | tlp 'len(l) < 20' 75 76 # function/callable results are automatically called on the current line 77 head -c 1024 /dev/urandom | strings | tlp len 78 ''' 79 80 81 from itertools import islice 82 from json import dumps, loads 83 compile_py = compile 84 from re import compile as compile_uncached, IGNORECASE 85 from sys import argv, exit, stderr, stdin 86 from time import sleep 87 from typing import Generator, Iterable 88 89 90 if len(argv) < 2: 91 print(info.strip(), file=stderr) 92 exit(0) 93 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 94 print(info.strip()) 95 exit(0) 96 97 98 def handle_no_input(expr): 99 res = eval(expr) 100 if isinstance(res, (list, range, tuple, Generator)): 101 for e in res: 102 e = adapt_result(e, None) 103 if not (e is None): 104 print(e, flush=True) 105 return 106 107 res = adapt_result(res, None) 108 if not (res is None): 109 print(res, flush=True) 110 111 112 def handle_lines(src, expr): 113 # `comprehension` expressions seem to ignore local variables: even 114 # lambda-based workarounds fail 115 global i, l, line, v, val, value, e, err, error 116 117 i = 0 118 e = err = error = None 119 120 for l in src: 121 l = l.rstrip('\r\n').rstrip('\n') 122 if i == 0: 123 l = l.lstrip('\xef\xbb\xbf') 124 125 line = l 126 try: 127 e = err = error = None 128 v = val = value = loads(l) 129 except Exception as ex: 130 e = err = error = ex 131 v = val = value = Skip() 132 res = eval(expr) 133 i += 1 134 135 if isinstance(res, (list, range, tuple, Generator)): 136 for e in res: 137 e = adapt_result(e, None) 138 if not (e is None): 139 print(e, flush=True) 140 continue 141 142 res = adapt_result(res, line) 143 if not (res is None): 144 print(res, flush=True) 145 146 147 def handle_pipe(src, expressions): 148 # `comprehension` expressions seem to ignore local variables: even 149 # lambda-based workarounds fail 150 global i, l, line, v, val, value, e, err, error 151 # variable names `o` and `p` work like in the `pyp` tool, except 152 # the pipeline steps were given as separate cmd-line arguments 153 global o, p 154 155 i = 0 156 e = err = error = None 157 158 for l in src: 159 l = l.rstrip('\r\n').rstrip('\n') 160 if i == 0: 161 l = l.lstrip('\xef\xbb\xbf') 162 163 line = l 164 o = p = prev = line 165 try: 166 e = err = error = None 167 v = val = value = loads(l) 168 except Exception as ex: 169 e = err = error = ex 170 v = val = value = Skip() 171 172 for expr in expressions: 173 p = eval(expr) 174 if callable(p): 175 p = p(prev) 176 prev = p 177 178 res = p 179 i += 1 180 181 if isinstance(res, (list, range, tuple, Generator)): 182 for e in res: 183 e = adapt_result(e, None) 184 if not (e is None): 185 print(e, flush=True) 186 continue 187 188 res = adapt_result(res, line) 189 if not (res is None): 190 print(res, flush=True) 191 192 193 def hold_lines(src, lines): 194 for e in src: 195 lines.append(e) 196 yield e 197 198 199 def adapt_result(res, fallback): 200 if isinstance(res, BaseException): 201 raise res 202 if isinstance(res, Skip) or res is None or res is False: 203 return None 204 if callable(res): 205 return res(fallback) 206 if res is True: 207 return fallback 208 if isinstance(res, dict): 209 return dumps(res, allow_nan=False) 210 return str(res) 211 212 213 def fail(msg, code = 1): 214 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 215 exit(code) 216 217 218 def make_open_utf8(open): 219 def open_utf8_readonly(path): 220 return open(path, encoding='utf-8') 221 return open_utf8_readonly 222 223 def seemsurl(path): 224 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 225 return any(path.startswith(p) for p in protocols) 226 227 228 class Skip: 229 pass 230 231 232 skip = Skip() 233 234 235 def chunk(items, chunk_size): 236 'Break iterable into chunks, each with up to the item-count given.' 237 238 if isinstance(items, str): 239 n = len(items) 240 while n >= chunk_size: 241 yield items[:chunk_size] 242 items = items[chunk_size:] 243 n -= chunk_size 244 if n > 0: 245 yield items 246 return 247 248 if not isinstance(chunk_size, int): 249 raise Exception('non-integer chunk-size') 250 if chunk_size < 1: 251 raise Exception('non-positive chunk-size') 252 253 it = iter(items) 254 while True: 255 head = tuple(islice(it, chunk_size)) 256 if not head: 257 return 258 yield head 259 260 chunked = chunk 261 262 # re_cache is used by custom func compile to cache previously-compiled 263 # regular-expressions, which makes them quicker to (re)use in formulas 264 re_cache = {} 265 266 # ire_cache is like re_cache, except it's for case-insensitive regexes 267 ire_cache = {} 268 269 def compile(expr, flags = 0): 270 'Speed-up using regexes across lines, by avoiding recompilations.' 271 272 if flags != 0 and flags != IGNORECASE: 273 msg = 'only the default and case-insensitive options are supported' 274 raise Exception(msg) 275 276 cache = re_cache if flags == 0 else ire_cache 277 if expr in cache: 278 return cache[expr] 279 280 pat = compile_uncached(expr, flags) 281 cache[expr] = pat 282 return pat 283 284 def icompile(expr): 285 return compile(expr, IGNORECASE) 286 287 def cond(*args): 288 if len(args) == 0: 289 return None 290 291 for i, e in enumerate(args): 292 if i % 2 == 0 and i < len(args) - 1 and e: 293 return args[i + 1] 294 295 return args[-1] if len(args) % 2 == 1 else None 296 297 def dive(into, using): 298 'Depth-first recursive caller for 1-input functions.' 299 300 if callable(into): 301 into, using = using, into 302 303 def rec(v): 304 if isinstance(v, dict): 305 return {k: rec(v) for k, v in v.items()} 306 if isinstance(v, Iterable) and not isinstance(v, str): 307 return [rec(v) for v in v] 308 return using(v) 309 310 return rec(into) 311 312 def divekeys(into, using): 313 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 314 315 if callable(into): 316 into, using = using, into 317 318 def rec(v): 319 if isinstance(v, dict): 320 return {using(k): rec(v) for k, v in v.items()} 321 if isinstance(v, Iterable) and not isinstance(v, str): 322 return [rec(v) for i, v in enumerate(v)] 323 return v 324 325 return rec(None, into) 326 327 def divekv(into, using, using2 = None): 328 'Depth-first recursive caller for 2-input functions.' 329 330 if using2 is None: 331 if callable(into): 332 into, using = using, into 333 else: 334 if not callable(using2): 335 into, using, using2 = using2, into, using 336 337 def rec(k, v): 338 if isinstance(v, dict): 339 return {k: rec(k, v) for k, v in v.items()} 340 if isinstance(v, Iterable) and not isinstance(v, str): 341 return [rec(i, v) for i, v in enumerate(v)] 342 return using(k, v) 343 344 def rec2(k, v): 345 if isinstance(v, dict): 346 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 347 if isinstance(v, Iterable) and not isinstance(v, str): 348 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 349 return [rec2(i, v) for i, v in enumerate(v)] 350 return using2(k, v) 351 352 return rec(None, into) if using2 is None else rec2(None, into) 353 354 kvdive = divekv 355 356 def drop(src, *what): 357 if isinstance(src, str): 358 for s in what: 359 src = src.replace(s, '') 360 return src 361 362 def kdrop(src, what): 363 return {k: v for (k, v) in src.items() if not (k in what)} 364 365 if isinstance(src, dict): 366 return kdrop(src, set(what)) 367 368 if isinstance(src, Iterable): 369 what = set(what) 370 return [kdrop(e, what) for e in src if isinstance(e, dict)] 371 372 return None 373 374 dropped = drop 375 376 def join(x, y = ' '): 377 'Join values into a string, or make a dict from keys and values.' 378 379 if isinstance(x, str): 380 return x.join(str(v) for v in y) 381 if isinstance(y, str): 382 return y.join(str(v) for v in x) 383 return {k: v for k, v in zip(x, y)} 384 385 def pick(src, *keys): 386 if isinstance(src, dict): 387 return {k: src.get(k, None) for k in keys} 388 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 389 390 picked = pick 391 392 def rescue(attempt, fallback = None): 393 try: 394 return attempt() 395 except Exception as e: 396 if callable(fallback): 397 return fallback(e) 398 return fallback 399 400 catch = rescue 401 recover = rescue 402 rescued = rescue 403 404 def retype(x): 405 'Try to narrow the type of the value given.' 406 407 if isinstance(x, float): 408 n = int(x) 409 return n if float(n) == x else x 410 411 if not isinstance(x, str): 412 return x 413 414 try: 415 return loads(x) 416 except Exception: 417 pass 418 419 try: 420 return int(x) 421 except Exception: 422 pass 423 424 try: 425 return float(x) 426 except Exception: 427 pass 428 429 return x 430 431 autocast = retype 432 autocasted = retype 433 mold = retype 434 molded = retype 435 recast = retype 436 recasted = retype 437 remold = retype 438 remolded = retype 439 retyped = retype 440 441 def json0(x): 442 return dumps(x, separators=(',', ':'), allow_nan=False, indent=None) 443 444 j0 = json0 445 446 def jsonl(x): 447 if isinstance(x, Skip): 448 return 449 450 def emit(x): 451 return dumps(x, separators=(', ', ': '), allow_nan=False, indent=None) 452 453 if x is None: 454 yield emit(x) 455 return 456 457 if isinstance(x, (bool, int, float, dict, str)): 458 yield emit(x) 459 return 460 461 if isinstance(x, Iterable): 462 for e in x: 463 if isinstance(e, Skip): 464 continue 465 yield emit(x) 466 return 467 468 yield emit(str(x)) 469 470 jl = jsonl 471 jsonlines = jsonl 472 ndjson = jsonl 473 474 def typeof(x): 475 # return str(type(x)) 476 return { 477 type(None): 'null', 478 bool: 'boolean', 479 dict: 'object', 480 float: 'number', 481 int: 'number', 482 str: 'string', 483 list: 'array', 484 tuple: 'array', 485 }.get(type(x), 'other') 486 487 jstype = typeof 488 489 def wait(seconds, result): 490 'Wait the given number of seconds, before returning its latter arg.' 491 492 t = (int, float) 493 if (not isinstance(seconds, t)) and isinstance(result, t): 494 seconds, result = result, seconds 495 sleep(seconds) 496 return result 497 498 delay = wait 499 500 def after(x, what): 501 i = x.find(what) 502 return '' if i < 0 else x[i+len(what):] 503 504 def afterlast(x, what): 505 i = x.rfind(what) 506 return '' if i < 0 else x[i+len(what):] 507 508 afterfinal = afterlast 509 510 def before(x, what): 511 i = x.find(what) 512 return x if i < 0 else x[:i] 513 514 def beforelast(x, what): 515 i = x.rfind(what) 516 return x if i < 0 else x[:i] 517 518 beforefinal = beforelast 519 520 def since(x, what): 521 i = x.find(what) 522 return '' if i < 0 else x[i:] 523 524 def sincelast(x, what): 525 i = x.rfind(what) 526 return '' if i < 0 else x[i:] 527 528 sincefinal = sincelast 529 530 def until(x, what): 531 i = x.find(what) 532 return x if i < 0 else x[:i+len(what)] 533 534 def untilfinal(x, what): 535 i = x.rfind(what) 536 return x if i < 0 else x[:i+len(what)] 537 538 untillast = untilfinal 539 540 def blue(s): 541 return f'\x1b[38;2;0;95;215m{s}\x1b[0m' 542 543 def blueback(s): 544 return f'\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m{s}\x1b[0m' 545 546 bluebg = blueback 547 548 def bold(s): 549 return f'\x1b[1m{s}\x1b[0m' 550 551 bolded = bold 552 553 def gbm(s, good = False, bad = False, meh = False): 554 ''' 555 Good, Bad, Meh ANSI-styles a plain string via ANSI-style sequences, 556 according to 1..3 conditions given as boolean(ish) values: these are 557 checked in order, so the first truish one wins. 558 ''' 559 560 if good: 561 return green(s) 562 if bad: 563 return red(s) 564 if meh: 565 return gray(s) 566 return s 567 568 def gray(s): 569 return f'\x1b[38;2;168;168;168m{s}\x1b[0m' 570 571 def grayback(s): 572 return f'\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m{s}\x1b[0m' 573 574 graybg = grayback 575 576 def green(s): 577 return f'\x1b[38;2;0;135;95m{s}\x1b[0m' 578 579 def greenback(s): 580 return f'\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m{s}\x1b[0m' 581 582 greenbg = greenback 583 584 def highlight(s): 585 return f'\x1b[7m{s}\x1b[0m' 586 587 hilite = highlight 588 589 def magenta(s): 590 return f'\x1b[38;2;215;0;255m{s}\x1b[0m' 591 592 def magentaback(s): 593 return f'\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 594 595 magback = magentaback 596 magbg = magentaback 597 magentabg = magentaback 598 599 def orange(s): 600 return f'\x1b[38;2;215;95;0m{s}\x1b[0m' 601 602 def orangeback(s): 603 return f'\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 604 605 orangebg = orangeback 606 orback = orangeback 607 orbg = orangeback 608 609 def purple(s): 610 return f'\x1b[38;2;135;95;255m{s}\x1b[0m' 611 612 def purpleback(s): 613 return f'\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 614 615 purback = purpleback 616 purbg = purpleback 617 purplebg = purpleback 618 619 def red(s): 620 return f'\x1b[38;2;204;0;0m{s}\x1b[0m' 621 622 def redback(s): 623 return f'\x1b[38;2;204;0;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 624 625 redbg = redback 626 627 def underline(s): 628 return f'\x1b[4m{s}\x1b[0m' 629 630 underlined = underline 631 632 def message(msg, result = None): 633 print(msg, file=stderr) 634 return result 635 636 msg = message 637 638 seen = set() 639 def once(x): 640 if x in seen: 641 return None 642 seen.add(x) 643 return x 644 645 def utf8(x): 646 try: 647 if isinstance(x, str): 648 x = x.encode('utf-8') 649 return str(x, 'utf-8') 650 except Exception: 651 return None 652 653 654 cr = '\r' 655 crlf = '\r\n' 656 dquo = '"' 657 dquote = '"' 658 empty = '' 659 lcurly = '{' 660 lf = '\n' 661 rcurly = '}' 662 space = ' ' 663 squo = '\'' 664 squote = '\'' 665 tab = '\t' 666 667 nil = None 668 none = None 669 null = None 670 671 672 exec = None 673 open_utf8 = make_open_utf8(open) 674 open = open_utf8 675 676 no_input_opts = ( 677 '=', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 678 ) 679 modules_opts = ( 680 '-m', '--m', '-mod', '--mod', '-module', '--module', 681 '-modules', '--modules', 682 ) 683 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 684 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 685 686 args = argv[1:] 687 if any(seemsurl(e) for e in args): 688 from io import TextIOWrapper 689 from urllib.request import urlopen 690 691 no_input = False 692 pipe_mode = False 693 trace_errors = False 694 695 while len(args) > 0: 696 if args[0] in no_input_opts: 697 no_input = True 698 args = args[1:] 699 continue 700 701 if args[0] in pipe_opts: 702 pipe_mode = True 703 args = args[1:] 704 break 705 706 if args[0] in modules_opts: 707 try: 708 if len(args) < 2: 709 msg = 'a module name or a comma-separated list of modules' 710 raise Exception('expected ' + msg) 711 712 g = globals() 713 from importlib import import_module 714 for e in args[1].split(','): 715 g[e] = import_module(e) 716 717 g = None 718 import_module = None 719 args = args[2:] 720 except Exception as e: 721 fail(e, 1) 722 723 continue 724 725 if args[0] in trace_opts: 726 trace_errors = True 727 args = args[1:] 728 continue 729 730 break 731 732 733 try: 734 if pipe_mode: 735 if no_input: 736 raise Exception('can\'t use pipe-mode when input is disabled') 737 exprs = [compile_py(e, e, mode='eval') for e in args] 738 compile_py = None 739 handle_pipe(stdin, exprs) 740 exit(0) 741 742 expr = '.' 743 if len(args) > 0: 744 expr = args[0] 745 args = args[1:] 746 747 if expr == '.' and no_input: 748 print(info.strip(), file=stderr) 749 exit(0) 750 751 if expr == '.': 752 expr = 'line' 753 754 expr = compile_py(expr, expr, mode='eval') 755 compile_py = None 756 757 if no_input: 758 handle_no_input(expr) 759 exit(0) 760 761 if len(args) == 0: 762 handle_lines(stdin, expr) 763 exit(0) 764 765 got_stdin = False 766 all_stdin = None 767 dashes = args.count('-') 768 769 for path in args: 770 if path == '-': 771 if dashes > 1: 772 if not got_stdin: 773 all_stdin = [] 774 handle_lines(hold_lines(stdin, all_stdin), expr) 775 got_stdin = True 776 else: 777 handle_lines(all_stdin, expr) 778 else: 779 handle_lines(stdin, expr) 780 continue 781 782 if seemsurl(path): 783 with urlopen(path) as inp: 784 with TextIOWrapper(inp, encoding='utf-8') as txt: 785 handle_lines(txt, expr) 786 continue 787 788 with open_utf8(path) as txt: 789 handle_lines(txt, expr) 790 except BrokenPipeError: 791 # quit quietly, instead of showing a confusing error message 792 stderr.close() 793 exit(0) 794 except KeyboardInterrupt: 795 exit(2) 796 except Exception as e: 797 if trace_errors: 798 raise e 799 else: 800 fail(e, 1)