File: tlp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tlp [options...] [python expression] [files/URIs...] 28 29 30 Transform Lines with Python runs a python expression on each line of text 31 input, encoded as UTF-8. Carriage-returns are always ignored in lines, as 32 well as any UTF-8-BOM on the first line of each input. 33 34 The expression can use either `l` or `line` for the current line, and `i` as 35 a 0-based line counter which keeps growing even across input-sources, when 36 given multiple inputs. 37 38 Input-sources can be either files or web-URIs. When not given any explicit 39 named sources, the standard input is used. It's even possible to reuse the 40 standard input using multiple single dashes (-) in the order needed: stdin 41 is only read once in this case, and kept for later reuse. 42 43 When the expression results in None, the current input line is ignored. When 44 the expression results in a boolean, this determines whether the line is 45 emitted to the standard output, or ignored. 46 47 When the expression emits lists, tuples, or generators, each item is emitted 48 as its own line/result. Since empty containers emit no lines, these are the 49 most general type of results, acting as either filters, or input-amplifiers. 50 51 52 Examples 53 54 # numbers from 0 to 5, each on its own output line; no input is read/used 55 tlp = 'range(6)' 56 57 # all powers up to the 4th, using each input line auto-parsed into a `float` 58 tlp = 'range(1, 6)' | tlp '(float(l)**p for p in range(1, 4+1))' 59 60 # separate input lines with an empty line between each; global var `empty` 61 # can be used to avoid bothering with nested shell-quoting 62 tlp = 'range(6)' | tlp '["", l] if i > 0 else l' 63 64 # ignore errors/exceptions, in favor of the original lines/values 65 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), line)' 66 67 # ignore errors/exceptions, calling a fallback func with the exception 68 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), str)' 69 70 # filtering lines out via None values 71 head -c 1024 /dev/urandom | strings | tlp 'l if len(l) < 20 else None' 72 73 # boolean-valued results are concise ways to filter lines out 74 head -c 1024 /dev/urandom | strings | tlp 'len(l) < 20' 75 76 # function/callable results are automatically called on the current line 77 head -c 1024 /dev/urandom | strings | tlp len 78 ''' 79 80 81 from itertools import islice 82 from json import dumps, loads 83 compile_py = compile 84 from re import compile as compile_uncached, IGNORECASE 85 from sys import argv, exit, stderr, stdin 86 from time import sleep 87 from typing import Generator, Iterable 88 89 90 if len(argv) < 2: 91 print(info.strip(), file=stderr) 92 exit(0) 93 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 94 print(info.strip()) 95 exit(0) 96 97 98 def handle_no_input(expr): 99 res = eval(expr) 100 if isinstance(res, (list, range, tuple, Generator)): 101 for e in res: 102 e = adapt_result(e, None) 103 if not (e is None): 104 print(e, flush=True) 105 return 106 107 res = adapt_result(res, None) 108 if not (res is None): 109 print(res, flush=True) 110 111 112 def handle_lines(src, expr): 113 # `comprehension` expressions seem to ignore local variables: even 114 # lambda-based workarounds fail 115 global i, l, line, v, val, value, e, err, error 116 117 i = 0 118 e = err = error = None 119 120 for l in src: 121 l = l.rstrip('\r\n').rstrip('\n') 122 if i == 0: 123 l = l.lstrip('\xef\xbb\xbf') 124 125 line = l 126 try: 127 e = err = error = None 128 v = val = value = loads(l) 129 except Exception as ex: 130 e = err = error = ex 131 v = val = value = Skip() 132 res = eval(expr) 133 i += 1 134 135 if isinstance(res, (list, range, tuple, Generator)): 136 for e in res: 137 e = adapt_result(e, None) 138 if not (e is None): 139 print(e, flush=True) 140 continue 141 142 res = adapt_result(res, line) 143 if not (res is None): 144 print(res, flush=True) 145 146 147 def handle_pipe(src, expressions): 148 # `comprehension` expressions seem to ignore local variables: even 149 # lambda-based workarounds fail 150 global i, l, line, v, val, value, e, err, error 151 # variable names `o` and `p` work like in the `pyp` tool, except 152 # the pipeline steps were given as separate cmd-line arguments 153 global o, p 154 155 i = 0 156 e = err = error = None 157 158 for l in src: 159 l = l.rstrip('\r\n').rstrip('\n') 160 if i == 0: 161 l = l.lstrip('\xef\xbb\xbf') 162 163 line = l 164 o = p = prev = line 165 try: 166 e = err = error = None 167 v = val = value = loads(l) 168 except Exception as ex: 169 e = err = error = ex 170 v = val = value = Skip() 171 172 for expr in expressions: 173 p = eval(expr) 174 if callable(p): 175 p = p(prev) 176 prev = p 177 178 res = p 179 i += 1 180 181 if isinstance(res, (list, range, tuple, Generator)): 182 for e in res: 183 e = adapt_result(e, None) 184 if not (e is None): 185 print(e, flush=True) 186 continue 187 188 res = adapt_result(res, line) 189 if not (res is None): 190 print(res, flush=True) 191 192 193 def hold_lines(src, lines): 194 for e in src: 195 lines.append(e) 196 yield e 197 198 199 def adapt_result(res, fallback): 200 if isinstance(res, BaseException): 201 raise res 202 if isinstance(res, Skip) or res is None or res is False: 203 return None 204 if callable(res): 205 return res(fallback) 206 if res is True: 207 return fallback 208 if isinstance(res, dict): 209 return dumps(res, allow_nan=False) 210 return str(res) 211 212 213 def fail(msg, code = 1): 214 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 215 exit(code) 216 217 218 def make_open_utf8(open): 219 def open_utf8_readonly(path): 220 return open(path, encoding='utf-8') 221 return open_utf8_readonly 222 223 def seemsurl(path): 224 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 225 return any(path.startswith(p) for p in protocols) 226 227 228 class Skip: 229 pass 230 231 232 skip = Skip() 233 234 235 def chunk(items, chunk_size): 236 'Break iterable into chunks, each with up to the item-count given.' 237 238 if isinstance(items, str): 239 n = len(items) 240 while n >= chunk_size: 241 yield items[:chunk_size] 242 items = items[chunk_size:] 243 n -= chunk_size 244 if n > 0: 245 yield items 246 return 247 248 if not isinstance(chunk_size, int): 249 raise Exception('non-integer chunk-size') 250 if chunk_size < 1: 251 raise Exception('non-positive chunk-size') 252 253 it = iter(items) 254 while True: 255 head = tuple(islice(it, chunk_size)) 256 if not head: 257 return 258 yield head 259 260 chunked = chunk 261 262 # re_cache is used by custom func compile to cache previously-compiled 263 # regular-expressions, which makes them quicker to (re)use in formulas 264 re_cache = {} 265 266 # ire_cache is like re_cache, except it's for case-insensitive regexes 267 ire_cache = {} 268 269 def compile(expr, flags = 0): 270 'Speed-up using regexes across lines, by avoiding recompilations.' 271 272 if flags != 0 and flags != IGNORECASE: 273 msg = 'only the default and case-insensitive options are supported' 274 raise Exception(msg) 275 276 cache = re_cache if flags == 0 else ire_cache 277 if expr in cache: 278 return cache[expr] 279 280 pat = compile_uncached(expr, flags) 281 cache[expr] = pat 282 return pat 283 284 def icompile(expr): 285 return compile(expr, IGNORECASE) 286 287 def cond(*args): 288 if len(args) == 0: 289 return None 290 291 for i, e in enumerate(args): 292 if i % 2 == 0 and i < len(args) - 1 and e: 293 return args[i + 1] 294 295 return args[-1] if len(args) % 2 == 1 else None 296 297 def dive(into, using): 298 'Depth-first recursive caller for 1-input functions.' 299 300 if callable(into): 301 into, using = using, into 302 303 def rec(v): 304 if isinstance(v, dict): 305 return {k: rec(v) for k, v in v.items()} 306 if isinstance(v, Iterable) and not isinstance(v, str): 307 return [rec(v) for v in v] 308 return using(v) 309 310 return rec(into) 311 312 def divekeys(into, using): 313 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 314 315 if callable(into): 316 into, using = using, into 317 318 def rec(v): 319 if isinstance(v, dict): 320 return {using(k): rec(v) for k, v in v.items()} 321 if isinstance(v, Iterable) and not isinstance(v, str): 322 return [rec(v) for i, v in enumerate(v)] 323 return v 324 325 return rec(None, into) 326 327 def divekv(into, using, using2 = None): 328 'Depth-first recursive caller for 2-input functions.' 329 330 if using2 is None: 331 if callable(into): 332 into, using = using, into 333 else: 334 if not callable(using2): 335 into, using, using2 = using2, into, using 336 337 def rec(k, v): 338 if isinstance(v, dict): 339 return {k: rec(k, v) for k, v in v.items()} 340 if isinstance(v, Iterable) and not isinstance(v, str): 341 return [rec(i, v) for i, v in enumerate(v)] 342 return using(k, v) 343 344 def rec2(k, v): 345 if isinstance(v, dict): 346 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 347 if isinstance(v, Iterable) and not isinstance(v, str): 348 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 349 return [rec2(i, v) for i, v in enumerate(v)] 350 return using2(k, v) 351 352 return rec(None, into) if using2 is None else rec2(None, into) 353 354 kvdive = divekv 355 356 def drop(src, *what): 357 if isinstance(src, str): 358 for s in what: 359 src = src.replace(s, '') 360 return src 361 362 def kdrop(src, what): 363 return {k: v for (k, v) in src.items() if not (k in what)} 364 365 if isinstance(src, dict): 366 return kdrop(src, set(what)) 367 368 if isinstance(src, Iterable): 369 what = set(what) 370 return [kdrop(e, what) for e in src if isinstance(e, dict)] 371 372 return None 373 374 dropped = drop 375 376 def join(x, y = ' '): 377 'Join values into a string, or make a dict from keys and values.' 378 379 if isinstance(x, str): 380 return x.join(str(v) for v in y) 381 if isinstance(y, str): 382 return y.join(str(v) for v in x) 383 return {k: v for k, v in zip(x, y)} 384 385 def pick(src, *keys): 386 if isinstance(src, dict): 387 return {k: src.get(k, None) for k in keys} 388 return [{k: e.get(k, None) for k in keys} for e in src if isinstance(e, dict)] 389 390 picked = pick 391 392 def rescue(attempt, fallback = None): 393 try: 394 return attempt() 395 except Exception as e: 396 if callable(fallback): 397 return fallback(e) 398 return fallback 399 400 catch = rescue 401 recover = rescue 402 rescued = rescue 403 404 def retype(x): 405 'Try to narrow the type of the value given.' 406 407 if isinstance(x, float): 408 n = int(x) 409 return n if float(n) == x else x 410 411 if not isinstance(x, str): 412 return x 413 414 try: 415 return loads(x) 416 except Exception: 417 pass 418 419 try: 420 return int(x) 421 except Exception: 422 pass 423 424 try: 425 return float(x) 426 except Exception: 427 pass 428 429 return x 430 431 autocast = retype 432 autocasted = retype 433 mold = retype 434 molded = retype 435 recast = retype 436 recasted = retype 437 remold = retype 438 remolded = retype 439 retyped = retype 440 441 def json0(x): 442 return dumps(x, separators=(',', ':'), allow_nan=False, indent=None) 443 444 j0 = json0 445 446 def jsonl(x): 447 if isinstance(x, Skip): 448 return 449 450 def emit(x): 451 sep = (', ', ': ') 452 return dumps(x, separators=sep, allow_nan=False, indent=None) 453 454 if x is None: 455 yield emit(x) 456 return 457 458 if isinstance(x, (bool, int, float, dict, str)): 459 yield emit(x) 460 return 461 462 if isinstance(x, Iterable): 463 for e in x: 464 if isinstance(e, Skip): 465 continue 466 yield emit(x) 467 return 468 469 yield emit(str(x)) 470 471 jl = jsonl 472 jsonlines = jsonl 473 ndjson = jsonl 474 475 def typeof(x): 476 # return str(type(x)) 477 return { 478 type(None): 'null', 479 bool: 'boolean', 480 dict: 'object', 481 float: 'number', 482 int: 'number', 483 str: 'string', 484 list: 'array', 485 tuple: 'array', 486 }.get(type(x), 'other') 487 488 jstype = typeof 489 490 def wait(seconds, result): 491 'Wait the given number of seconds, before returning its latter arg.' 492 493 t = (int, float) 494 if (not isinstance(seconds, t)) and isinstance(result, t): 495 seconds, result = result, seconds 496 sleep(seconds) 497 return result 498 499 delay = wait 500 501 def after(x, what): 502 i = x.find(what) 503 return '' if i < 0 else x[i+len(what):] 504 505 def afterlast(x, what): 506 i = x.rfind(what) 507 return '' if i < 0 else x[i+len(what):] 508 509 afterfinal = afterlast 510 511 def before(x, what): 512 i = x.find(what) 513 return x if i < 0 else x[:i] 514 515 def beforelast(x, what): 516 i = x.rfind(what) 517 return x if i < 0 else x[:i] 518 519 beforefinal = beforelast 520 521 def since(x, what): 522 i = x.find(what) 523 return '' if i < 0 else x[i:] 524 525 def sincelast(x, what): 526 i = x.rfind(what) 527 return '' if i < 0 else x[i:] 528 529 sincefinal = sincelast 530 531 def until(x, what): 532 i = x.find(what) 533 return x if i < 0 else x[:i+len(what)] 534 535 def untilfinal(x, what): 536 i = x.rfind(what) 537 return x if i < 0 else x[:i+len(what)] 538 539 untillast = untilfinal 540 541 def blue(s): 542 return f'\x1b[38;2;0;95;215m{s}\x1b[0m' 543 544 def blueback(s): 545 return f'\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m{s}\x1b[0m' 546 547 bluebg = blueback 548 549 def bold(s): 550 return f'\x1b[1m{s}\x1b[0m' 551 552 bolded = bold 553 554 def gbm(s, good = False, bad = False, meh = False): 555 ''' 556 Good, Bad, Meh ANSI-styles a plain string via ANSI-style sequences, 557 according to 1..3 conditions given as boolean(ish) values: these are 558 checked in order, so the first truish one wins. 559 ''' 560 561 if good: 562 return green(s) 563 if bad: 564 return red(s) 565 if meh: 566 return gray(s) 567 return s 568 569 def gray(s): 570 return f'\x1b[38;2;168;168;168m{s}\x1b[0m' 571 572 def grayback(s): 573 return f'\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m{s}\x1b[0m' 574 575 graybg = grayback 576 577 def green(s): 578 return f'\x1b[38;2;0;135;95m{s}\x1b[0m' 579 580 def greenback(s): 581 return f'\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m{s}\x1b[0m' 582 583 greenbg = greenback 584 585 def highlight(s): 586 return f'\x1b[7m{s}\x1b[0m' 587 588 hilite = highlight 589 590 def magenta(s): 591 return f'\x1b[38;2;215;0;255m{s}\x1b[0m' 592 593 def magentaback(s): 594 return f'\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 595 596 magback = magentaback 597 magbg = magentaback 598 magentabg = magentaback 599 600 def orange(s): 601 return f'\x1b[38;2;215;95;0m{s}\x1b[0m' 602 603 def orangeback(s): 604 return f'\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 605 606 orangebg = orangeback 607 orback = orangeback 608 orbg = orangeback 609 610 def purple(s): 611 return f'\x1b[38;2;135;95;255m{s}\x1b[0m' 612 613 def purpleback(s): 614 return f'\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 615 616 purback = purpleback 617 purbg = purpleback 618 purplebg = purpleback 619 620 def red(s): 621 return f'\x1b[38;2;204;0;0m{s}\x1b[0m' 622 623 def redback(s): 624 return f'\x1b[38;2;204;0;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 625 626 redbg = redback 627 628 def underline(s): 629 return f'\x1b[4m{s}\x1b[0m' 630 631 underlined = underline 632 633 def message(msg, result = None): 634 print(msg, file=stderr) 635 return result 636 637 msg = message 638 639 seen = set() 640 def once(x): 641 if x in seen: 642 return None 643 seen.add(x) 644 return x 645 646 def utf8(x): 647 try: 648 if isinstance(x, str): 649 x = x.encode('utf-8') 650 return str(x, 'utf-8') 651 except Exception: 652 return None 653 654 655 cr = '\r' 656 crlf = '\r\n' 657 dquo = '"' 658 dquote = '"' 659 empty = '' 660 lcurly = '{' 661 lf = '\n' 662 rcurly = '}' 663 space = ' ' 664 squo = '\'' 665 squote = '\'' 666 tab = '\t' 667 668 nil = None 669 none = None 670 null = None 671 672 673 exec = None 674 open_utf8 = make_open_utf8(open) 675 open = open_utf8 676 677 no_input_opts = ( 678 '=', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 679 ) 680 modules_opts = ( 681 '-m', '--m', '-mod', '--mod', '-module', '--module', 682 '-modules', '--modules', 683 ) 684 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 685 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 686 687 args = argv[1:] 688 if any(seemsurl(e) for e in args): 689 from io import TextIOWrapper 690 from urllib.request import urlopen 691 692 no_input = False 693 pipe_mode = False 694 trace_errors = False 695 696 while len(args) > 0: 697 if args[0] in no_input_opts: 698 no_input = True 699 args = args[1:] 700 continue 701 702 if args[0] in pipe_opts: 703 pipe_mode = True 704 args = args[1:] 705 break 706 707 if args[0] in modules_opts: 708 try: 709 if len(args) < 2: 710 msg = 'a module name or a comma-separated list of modules' 711 raise Exception('expected ' + msg) 712 713 g = globals() 714 from importlib import import_module 715 for e in args[1].split(','): 716 g[e] = import_module(e) 717 718 g = None 719 import_module = None 720 args = args[2:] 721 except Exception as e: 722 fail(e, 1) 723 724 continue 725 726 if args[0] in trace_opts: 727 trace_errors = True 728 args = args[1:] 729 continue 730 731 break 732 733 734 try: 735 if pipe_mode: 736 if no_input: 737 raise Exception('can\'t use pipe-mode when input is disabled') 738 exprs = [compile_py(e, e, mode='eval') for e in args] 739 compile_py = None 740 handle_pipe(stdin, exprs) 741 exit(0) 742 743 expr = '.' 744 if len(args) > 0: 745 expr = args[0] 746 args = args[1:] 747 748 if expr == '.' and no_input: 749 print(info.strip(), file=stderr) 750 exit(0) 751 752 if expr == '.': 753 expr = 'line' 754 755 expr = compile_py(expr, expr, mode='eval') 756 compile_py = None 757 758 if no_input: 759 handle_no_input(expr) 760 exit(0) 761 762 if len(args) == 0: 763 handle_lines(stdin, expr) 764 exit(0) 765 766 got_stdin = False 767 all_stdin = None 768 dashes = args.count('-') 769 770 for path in args: 771 if path == '-': 772 if dashes > 1: 773 if not got_stdin: 774 all_stdin = [] 775 handle_lines(hold_lines(stdin, all_stdin), expr) 776 got_stdin = True 777 else: 778 handle_lines(all_stdin, expr) 779 else: 780 handle_lines(stdin, expr) 781 continue 782 783 if seemsurl(path): 784 with urlopen(path) as inp: 785 with TextIOWrapper(inp, encoding='utf-8') as txt: 786 handle_lines(txt, expr) 787 continue 788 789 with open_utf8(path) as txt: 790 handle_lines(txt, expr) 791 except BrokenPipeError: 792 # quit quietly, instead of showing a confusing error message 793 stderr.close() 794 exit(0) 795 except KeyboardInterrupt: 796 exit(2) 797 except Exception as e: 798 if trace_errors: 799 raise e 800 else: 801 fail(e, 1)