File: tlp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tlp [options...] [python expression] [files/URIs...] 28 29 30 Transform Lines with Python runs a python expression on each line of text 31 input, encoded as UTF-8. Carriage-returns are always ignored in lines, as 32 well as any UTF-8-BOM on the first line of each input. 33 34 The expression can use either `l` or `line` for the current line, and `i` as 35 a 0-based line counter which keeps growing even across input-sources, when 36 given multiple inputs. 37 38 Input-sources can be either files or web-URIs. When not given any explicit 39 named sources, the standard input is used. It's even possible to reuse the 40 standard input using multiple single dashes (-) in the order needed: stdin 41 is only read once in this case, and kept for later reuse. 42 43 When the expression results in None, the current input line is ignored. When 44 the expression results in a boolean, this determines whether the line is 45 emitted to the standard output, or ignored. 46 47 When the expression emits lists, tuples, or generators, each item is emitted 48 as its own line/result. Since empty containers emit no lines, these are the 49 most general type of results, acting as either filters, or input-amplifiers. 50 51 52 Examples 53 54 # numbers from 0 to 5, each on its own output line; no input is read/used 55 tlp = 'range(6)' 56 57 # all powers up to the 4th, using each input line auto-parsed into a `float` 58 tlp = 'range(1, 6)' | tlp '(float(l)**p for p in range(1, 4+1))' 59 60 # separate input lines with an empty line between each; global var `empty` 61 # can be used to avoid bothering with nested shell-quoting 62 tlp = 'range(6)' | tlp '["", l] if i > 0 else l' 63 64 # ignore errors/exceptions, in favor of the original lines/values 65 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), line)' 66 67 # ignore errors/exceptions, calling a fallback func with the exception 68 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), str)' 69 70 # filtering lines out via None values 71 head -c 1024 /dev/urandom | strings | tlp 'l if len(l) < 20 else None' 72 73 # boolean-valued results are concise ways to filter lines out 74 head -c 1024 /dev/urandom | strings | tlp 'len(l) < 20' 75 76 # function/callable results are automatically called on the current line 77 head -c 1024 /dev/urandom | strings | tlp len 78 ''' 79 80 81 from itertools import islice 82 from json import dumps, loads 83 compile_py = compile 84 from re import compile as compile_uncached, IGNORECASE 85 from sys import argv, exit, stderr, stdin 86 from time import sleep 87 from typing import Generator, Iterable 88 89 90 if len(argv) < 2: 91 print(info.strip(), file=stderr) 92 exit(0) 93 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 94 print(info.strip()) 95 exit(0) 96 97 98 def handle_no_input(expr): 99 res = eval(expr) 100 if isinstance(res, (list, range, tuple, Generator)): 101 for e in res: 102 e = adapt_result(e, None) 103 if not (e is None): 104 print(e, flush=True) 105 return 106 107 res = adapt_result(res, None) 108 if not (res is None): 109 print(res, flush=True) 110 111 112 def handle_lines(src, expr): 113 # `comprehension` expressions seem to ignore local variables: even 114 # lambda-based workarounds fail 115 global i, l, line, v, val, value, e, err, error 116 117 i = 0 118 e = err = error = None 119 120 for l in src: 121 l = l.rstrip('\r\n').rstrip('\n') 122 if i == 0: 123 l = l.lstrip('\xef\xbb\xbf') 124 125 line = l 126 try: 127 e = err = error = None 128 v = val = value = loads(l) 129 except Exception as ex: 130 e = err = error = ex 131 v = val = value = Skip() 132 res = eval(expr) 133 i += 1 134 135 if isinstance(res, (list, range, tuple, Generator)): 136 for e in res: 137 e = adapt_result(e, None) 138 if not (e is None): 139 print(e, flush=True) 140 continue 141 142 res = adapt_result(res, line) 143 if not (res is None): 144 print(res, flush=True) 145 146 147 def handle_pipe(src, expressions): 148 # `comprehension` expressions seem to ignore local variables: even 149 # lambda-based workarounds fail 150 global i, l, line, v, val, value, e, err, error 151 # variable names `o` and `p` work like in the `pyp` tool, except 152 # the pipeline steps were given as separate cmd-line arguments 153 global o, p 154 155 i = 0 156 e = err = error = None 157 158 for l in src: 159 l = l.rstrip('\r\n').rstrip('\n') 160 if i == 0: 161 l = l.lstrip('\xef\xbb\xbf') 162 163 line = l 164 o = p = prev = line 165 try: 166 e = err = error = None 167 v = val = value = loads(l) 168 except Exception as ex: 169 e = err = error = ex 170 v = val = value = Skip() 171 172 for expr in expressions: 173 p = eval(expr) 174 if callable(p): 175 p = p(prev) 176 prev = p 177 178 res = p 179 i += 1 180 181 if isinstance(res, (list, range, tuple, Generator)): 182 for e in res: 183 e = adapt_result(e, None) 184 if not (e is None): 185 print(e, flush=True) 186 continue 187 188 res = adapt_result(res, line) 189 if not (res is None): 190 print(res, flush=True) 191 192 193 def hold_lines(src, lines): 194 for e in src: 195 lines.append(e) 196 yield e 197 198 199 def adapt_result(res, fallback): 200 if isinstance(res, BaseException): 201 raise res 202 if isinstance(res, Skip) or res is None or res is False: 203 return None 204 if callable(res): 205 return res(fallback) 206 if res is True: 207 return fallback 208 if isinstance(res, dict): 209 return dumps(res, allow_nan=False) 210 return str(res) 211 212 213 def fail(msg, code = 1): 214 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 215 exit(code) 216 217 218 def make_open_utf8(open): 219 def open_utf8_readonly(path): 220 return open(path, encoding='utf-8') 221 return open_utf8_readonly 222 223 def seemsurl(path): 224 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 225 return any(path.startswith(p) for p in protocols) 226 227 228 class Skip: 229 pass 230 231 232 skip = Skip() 233 234 235 def chunk(items, chunk_size): 236 'Break iterable into chunks, each with up to the item-count given.' 237 238 if isinstance(items, str): 239 n = len(items) 240 while n >= chunk_size: 241 yield items[:chunk_size] 242 items = items[chunk_size:] 243 n -= chunk_size 244 if n > 0: 245 yield items 246 return 247 248 if not isinstance(chunk_size, int): 249 raise Exception('non-integer chunk-size') 250 if chunk_size < 1: 251 raise Exception('non-positive chunk-size') 252 253 it = iter(items) 254 while True: 255 head = tuple(islice(it, chunk_size)) 256 if not head: 257 return 258 yield head 259 260 chunked = chunk 261 262 # re_cache is used by custom func compile to cache previously-compiled 263 # regular-expressions, which makes them quicker to (re)use in formulas 264 re_cache = {} 265 266 # ire_cache is like re_cache, except it's for case-insensitive regexes 267 ire_cache = {} 268 269 def compile(expr, flags = 0): 270 'Speed-up using regexes across lines, by avoiding recompilations.' 271 272 if flags != 0 and flags != IGNORECASE: 273 msg = 'only the default and case-insensitive options are supported' 274 raise Exception(msg) 275 276 cache = re_cache if flags == 0 else ire_cache 277 if expr in cache: 278 return cache[expr] 279 280 pat = compile_uncached(expr, flags) 281 cache[expr] = pat 282 return pat 283 284 def icompile(expr): 285 return compile(expr, IGNORECASE) 286 287 def cond(*args): 288 if len(args) == 0: 289 return None 290 291 for i, e in enumerate(args): 292 if i % 2 == 0 and i < len(args) - 1 and e: 293 return args[i + 1] 294 295 return args[-1] if len(args) % 2 == 1 else None 296 297 def dive(into, using): 298 'Depth-first recursive caller for 1-input functions.' 299 300 if callable(into): 301 into, using = using, into 302 303 def rec(v): 304 if isinstance(v, dict): 305 return {k: rec(v) for k, v in v.items()} 306 if isinstance(v, Iterable) and not isinstance(v, str): 307 return [rec(v) for v in v] 308 return using(v) 309 310 return rec(into) 311 312 def divekeys(into, using): 313 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 314 315 if callable(into): 316 into, using = using, into 317 318 def rec(v): 319 if isinstance(v, dict): 320 return {using(k): rec(v) for k, v in v.items()} 321 if isinstance(v, Iterable) and not isinstance(v, str): 322 return [rec(v) for i, v in enumerate(v)] 323 return v 324 325 return rec(None, into) 326 327 def divekv(into, using, using2 = None): 328 'Depth-first recursive caller for 2-input functions.' 329 330 if using2 is None: 331 if callable(into): 332 into, using = using, into 333 else: 334 if not callable(using2): 335 into, using, using2 = using2, into, using 336 337 def rec(k, v): 338 if isinstance(v, dict): 339 return {k: rec(k, v) for k, v in v.items()} 340 if isinstance(v, Iterable) and not isinstance(v, str): 341 return [rec(i, v) for i, v in enumerate(v)] 342 return using(k, v) 343 344 def rec2(k, v): 345 if isinstance(v, dict): 346 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 347 if isinstance(v, Iterable) and not isinstance(v, str): 348 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 349 return [rec2(i, v) for i, v in enumerate(v)] 350 return using2(k, v) 351 352 return rec(None, into) if using2 is None else rec2(None, into) 353 354 kvdive = divekv 355 356 def drop(src, *what): 357 if isinstance(src, str): 358 for s in what: 359 src = src.replace(s, '') 360 return src 361 362 def kdrop(src, what): 363 return {k: v for (k, v) in src.items() if not (k in what)} 364 365 if isinstance(src, dict): 366 return kdrop(src, set(what)) 367 368 if isinstance(src, Iterable): 369 what = set(what) 370 return [kdrop(e, what) for e in src if isinstance(e, dict)] 371 372 return None 373 374 dropped = drop 375 376 def join(x, y = ' '): 377 'Join values into a string, or make a dict from keys and values.' 378 379 if isinstance(x, str): 380 return x.join(str(v) for v in y) 381 if isinstance(y, str): 382 return y.join(str(v) for v in x) 383 return {k: v for k, v in zip(x, y)} 384 385 def pick(src, *keys): 386 if isinstance(src, dict): 387 return {k: src.get(k, None) for k in keys} 388 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 389 390 picked = pick 391 392 def rescue(attempt, fallback = None): 393 try: 394 return attempt() 395 except Exception as e: 396 if callable(fallback): 397 return fallback(e) 398 return fallback 399 400 catch = rescue 401 catched = rescue 402 caught = rescue 403 recover = rescue 404 recovered = rescue 405 rescued = rescue 406 407 def retype(x): 408 'Try to narrow the type of the value given.' 409 410 if isinstance(x, float): 411 n = int(x) 412 return n if float(n) == x else x 413 414 if not isinstance(x, str): 415 return x 416 417 try: 418 return loads(x) 419 except Exception: 420 pass 421 422 try: 423 return int(x) 424 except Exception: 425 pass 426 427 try: 428 return float(x) 429 except Exception: 430 pass 431 432 return x 433 434 autocast = retype 435 autocasted = retype 436 mold = retype 437 molded = retype 438 recast = retype 439 recasted = retype 440 remold = retype 441 remolded = retype 442 retyped = retype 443 444 def json0(x): 445 return dumps(x, separators=(',', ':'), allow_nan=False, indent=None) 446 447 j0 = json0 448 449 def jsonl(x): 450 if isinstance(x, Skip): 451 return 452 453 def emit(x): 454 sep = (', ', ': ') 455 return dumps(x, separators=sep, allow_nan=False, indent=None) 456 457 if x is None: 458 yield emit(x) 459 return 460 461 if isinstance(x, (bool, int, float, dict, str)): 462 yield emit(x) 463 return 464 465 if isinstance(x, Iterable): 466 for e in x: 467 if isinstance(e, Skip): 468 continue 469 yield emit(x) 470 return 471 472 yield emit(str(x)) 473 474 jl = jsonl 475 jsonlines = jsonl 476 ndjson = jsonl 477 478 def typeof(x): 479 # return str(type(x)) 480 return { 481 type(None): 'null', 482 bool: 'boolean', 483 dict: 'object', 484 float: 'number', 485 int: 'number', 486 str: 'string', 487 list: 'array', 488 tuple: 'array', 489 }.get(type(x), 'other') 490 491 jstype = typeof 492 493 def wait(seconds, result): 494 'Wait the given number of seconds, before returning its latter arg.' 495 496 t = (int, float) 497 if (not isinstance(seconds, t)) and isinstance(result, t): 498 seconds, result = result, seconds 499 sleep(seconds) 500 return result 501 502 delay = wait 503 504 def after(x, what): 505 i = x.find(what) 506 return '' if i < 0 else x[i+len(what):] 507 508 def afterlast(x, what): 509 i = x.rfind(what) 510 return '' if i < 0 else x[i+len(what):] 511 512 afterfinal = afterlast 513 514 def before(x, what): 515 i = x.find(what) 516 return x if i < 0 else x[:i] 517 518 def beforelast(x, what): 519 i = x.rfind(what) 520 return x if i < 0 else x[:i] 521 522 beforefinal = beforelast 523 524 def since(x, what): 525 i = x.find(what) 526 return '' if i < 0 else x[i:] 527 528 def sincelast(x, what): 529 i = x.rfind(what) 530 return '' if i < 0 else x[i:] 531 532 sincefinal = sincelast 533 534 def until(x, what): 535 i = x.find(what) 536 return x if i < 0 else x[:i+len(what)] 537 538 def untilfinal(x, what): 539 i = x.rfind(what) 540 return x if i < 0 else x[:i+len(what)] 541 542 untillast = untilfinal 543 544 def blue(s): 545 return f'\x1b[38;2;0;95;215m{s}\x1b[0m' 546 547 def blueback(s): 548 return f'\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m{s}\x1b[0m' 549 550 bluebg = blueback 551 552 def bold(s): 553 return f'\x1b[1m{s}\x1b[0m' 554 555 bolded = bold 556 557 def gbm(s, good = False, bad = False, meh = False): 558 ''' 559 Good, Bad, Meh ANSI-styles a plain string via ANSI-style sequences, 560 according to 1..3 conditions given as boolean(ish) values: these are 561 checked in order, so the first truish one wins. 562 ''' 563 564 if good: 565 return green(s) 566 if bad: 567 return red(s) 568 if meh: 569 return gray(s) 570 return s 571 572 def gray(s): 573 return f'\x1b[38;2;168;168;168m{s}\x1b[0m' 574 575 def grayback(s): 576 return f'\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m{s}\x1b[0m' 577 578 graybg = grayback 579 580 def green(s): 581 return f'\x1b[38;2;0;135;95m{s}\x1b[0m' 582 583 def greenback(s): 584 return f'\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m{s}\x1b[0m' 585 586 greenbg = greenback 587 588 def highlight(s): 589 return f'\x1b[7m{s}\x1b[0m' 590 591 hilite = highlight 592 593 def magenta(s): 594 return f'\x1b[38;2;215;0;255m{s}\x1b[0m' 595 596 def magentaback(s): 597 return f'\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 598 599 magback = magentaback 600 magbg = magentaback 601 magentabg = magentaback 602 603 def orange(s): 604 return f'\x1b[38;2;215;95;0m{s}\x1b[0m' 605 606 def orangeback(s): 607 return f'\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 608 609 orangebg = orangeback 610 orback = orangeback 611 orbg = orangeback 612 613 def purple(s): 614 return f'\x1b[38;2;135;95;255m{s}\x1b[0m' 615 616 def purpleback(s): 617 return f'\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 618 619 purback = purpleback 620 purbg = purpleback 621 purplebg = purpleback 622 623 def red(s): 624 return f'\x1b[38;2;204;0;0m{s}\x1b[0m' 625 626 def redback(s): 627 return f'\x1b[38;2;204;0;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 628 629 redbg = redback 630 631 def underline(s): 632 return f'\x1b[4m{s}\x1b[0m' 633 634 underlined = underline 635 636 def message(msg, result = None): 637 print(msg, file=stderr) 638 return result 639 640 msg = message 641 642 seen = set() 643 def once(x): 644 if x in seen: 645 return None 646 seen.add(x) 647 return x 648 649 def utf8(x): 650 try: 651 if isinstance(x, str): 652 x = x.encode('utf-8') 653 return str(x, 'utf-8') 654 except Exception: 655 return None 656 657 658 cr = '\r' 659 crlf = '\r\n' 660 dquo = '"' 661 dquote = '"' 662 empty = '' 663 lcurly = '{' 664 lf = '\n' 665 rcurly = '}' 666 space = ' ' 667 squo = '\'' 668 squote = '\'' 669 tab = '\t' 670 671 nil = None 672 none = None 673 null = None 674 675 676 exec = None 677 open_utf8 = make_open_utf8(open) 678 open = open_utf8 679 680 no_input_opts = ( 681 '=', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 682 ) 683 modules_opts = ( 684 '-m', '--m', '-mod', '--mod', '-module', '--module', 685 '-modules', '--modules', 686 ) 687 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 688 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 689 690 args = argv[1:] 691 if any(seemsurl(e) for e in args): 692 from io import TextIOWrapper 693 from urllib.request import urlopen 694 695 no_input = False 696 pipe_mode = False 697 trace_errors = False 698 699 while len(args) > 0: 700 if args[0] in no_input_opts: 701 no_input = True 702 args = args[1:] 703 continue 704 705 if args[0] in pipe_opts: 706 pipe_mode = True 707 args = args[1:] 708 break 709 710 if args[0] in modules_opts: 711 try: 712 if len(args) < 2: 713 msg = 'a module name or a comma-separated list of modules' 714 raise Exception('expected ' + msg) 715 716 g = globals() 717 from importlib import import_module 718 for e in args[1].split(','): 719 g[e] = import_module(e) 720 721 g = None 722 import_module = None 723 args = args[2:] 724 except Exception as e: 725 fail(e, 1) 726 727 continue 728 729 if args[0] in trace_opts: 730 trace_errors = True 731 args = args[1:] 732 continue 733 734 break 735 736 737 try: 738 if pipe_mode: 739 if no_input: 740 raise Exception('can\'t use pipe-mode when input is disabled') 741 exprs = [compile_py(e, e, mode='eval') for e in args] 742 compile_py = None 743 handle_pipe(stdin, exprs) 744 exit(0) 745 746 expr = '.' 747 if len(args) > 0: 748 expr = args[0] 749 args = args[1:] 750 751 if expr == '.' and no_input: 752 print(info.strip(), file=stderr) 753 exit(0) 754 755 if expr == '.': 756 expr = 'line' 757 758 expr = compile_py(expr, expr, mode='eval') 759 compile_py = None 760 761 if no_input: 762 handle_no_input(expr) 763 exit(0) 764 765 if len(args) == 0: 766 handle_lines(stdin, expr) 767 exit(0) 768 769 got_stdin = False 770 all_stdin = None 771 dashes = args.count('-') 772 773 for path in args: 774 if path == '-': 775 if dashes > 1: 776 if not got_stdin: 777 all_stdin = [] 778 handle_lines(hold_lines(stdin, all_stdin), expr) 779 got_stdin = True 780 else: 781 handle_lines(all_stdin, expr) 782 else: 783 handle_lines(stdin, expr) 784 continue 785 786 if seemsurl(path): 787 with urlopen(path) as inp: 788 with TextIOWrapper(inp, encoding='utf-8') as txt: 789 handle_lines(txt, expr) 790 continue 791 792 with open_utf8(path) as txt: 793 handle_lines(txt, expr) 794 except BrokenPipeError: 795 # quit quietly, instead of showing a confusing error message 796 stderr.close() 797 exit(0) 798 except KeyboardInterrupt: 799 exit(2) 800 except Exception as e: 801 if trace_errors: 802 raise e 803 else: 804 fail(e, 1)