File: tlp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tlp [options...] [python expression] [files/URIs...] 28 29 30 Transform Lines with Python runs a python expression on each line of text 31 input, encoded as UTF-8. Carriage-returns are always ignored in lines, as 32 well as any UTF-8-BOM on the first line of each input. 33 34 The expression can use either `l` or `line` for the current line, and `i` as 35 a 0-based line counter, which keeps growing, even across input-sources, when 36 given more than one. 37 38 Input-sources can be either files or web-URIs. When not given any explicit 39 named sources, the standard input is used. It's even possible to reuse the 40 standard input using multiple single dashes (-) in the order needed: stdin 41 is only read once in this case, and kept for later reuse. 42 43 When the expression results in None, the current input line is ignored. When 44 the expression results in a boolean, this determines whether the line is 45 emitted to the standard output, or ignored. 46 47 When the expression emits lists, tuples, or generators, each item is emitted 48 as its own line/result. Since empty containers emit no lines, these are the 49 most general type of results, acting as either filters, or input-amplifiers. 50 51 52 Examples 53 54 # numbers from 0 to 5, each on its own output line; no input is read/used 55 tlp = 'range(6)' 56 57 # all powers up to the 4th, using each input line auto-parsed into a `float` 58 tlp = 'range(1, 6)' | tlp '(float(l)**p for p in range(1, 4+1))' 59 60 # separate input lines with an empty line between each; global var `empty` 61 # can be used to avoid bothering with nested shell-quoting 62 tlp = 'range(6)' | tlp '["", l] if i > 0 else l' 63 64 # ignore errors/exceptions, in favor of the original lines/values 65 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), line)' 66 67 # ignore errors/exceptions, calling a fallback func with the exception 68 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), str)' 69 70 # filtering lines out via None values 71 head -c 1024 /dev/urandom | strings | tlp 'l if len(l) < 20 else None' 72 73 # boolean-valued results are concise ways to filter lines out 74 head -c 1024 /dev/urandom | strings | tlp 'len(l) < 20' 75 76 # function/callable results are automatically called on the current line 77 head -c 1024 /dev/urandom | strings | tlp len 78 ''' 79 80 81 from itertools import islice 82 from json import dumps, loads 83 compile_py = compile 84 from re import compile as compile_uncached, IGNORECASE 85 from sys import argv, exit, stderr, stdin 86 from time import sleep 87 from typing import Generator, Iterable 88 89 90 if len(argv) < 2: 91 print(info.strip(), file=stderr) 92 exit(0) 93 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 94 print(info.strip()) 95 exit(0) 96 97 98 def handle_no_input(expr): 99 res = eval(expr) 100 if isinstance(res, (list, range, tuple, Generator)): 101 for e in res: 102 if not isinstance(e, Skip): 103 print(e, flush=True) 104 return 105 106 res = adapt_result(res, None) 107 if not (res is None): 108 print(res, flush=True) 109 110 111 def handle_lines(src, expr): 112 # `comprehension` expressions seem to ignore local variables: even 113 # lambda-based workarounds fail 114 global i, l, line, v, val, value, e, err, error 115 116 i = 0 117 e = err = error = None 118 119 for l in src: 120 l = l.rstrip('\r\n').rstrip('\n') 121 if i == 0: 122 l = l.lstrip('\xef\xbb\xbf') 123 124 line = l 125 try: 126 e = err = error = None 127 v = val = value = loads(l) 128 except Exception as ex: 129 e = err = error = ex 130 v = val = value = Skip() 131 res = eval(expr) 132 i += 1 133 134 if isinstance(res, (list, range, tuple, Generator)): 135 for e in res: 136 if not isinstance(e, Skip): 137 print(e, flush=True) 138 continue 139 140 res = adapt_result(res, line) 141 if not (res is None): 142 print(res, flush=True) 143 144 145 def handle_pipe(src, expressions): 146 # `comprehension` expressions seem to ignore local variables: even 147 # lambda-based workarounds fail 148 global i, l, line, v, val, value, e, err, error 149 # variable names `o` and `p` work like in the `pyp` tool, except 150 # the pipeline steps were given as separate cmd-line arguments 151 global o, p 152 153 i = 0 154 e = err = error = None 155 156 for l in src: 157 l = l.rstrip('\r\n').rstrip('\n') 158 if i == 0: 159 l = l.lstrip('\xef\xbb\xbf') 160 161 line = l 162 o = p = prev = line 163 try: 164 e = err = error = None 165 v = val = value = loads(l) 166 except Exception as ex: 167 e = err = error = ex 168 v = val = value = Skip() 169 170 for expr in expressions: 171 p = eval(expr) 172 if callable(p): 173 p = p(prev) 174 prev = p 175 176 res = p 177 i += 1 178 179 if isinstance(res, (list, range, tuple, Generator)): 180 for e in res: 181 if not isinstance(e, Skip): 182 print(e, flush=True) 183 continue 184 185 res = adapt_result(res, line) 186 if not (res is None): 187 print(res, flush=True) 188 189 190 def hold_lines(src, lines): 191 for e in src: 192 lines.append(e) 193 yield e 194 195 196 def adapt_result(res, fallback): 197 if isinstance(res, BaseException): 198 raise res 199 if isinstance(res, Skip): 200 return res 201 if res is None or res is False: 202 return None 203 if callable(res): 204 return res(fallback) 205 if res is True: 206 return fallback 207 if isinstance(res, dict): 208 return dumps(res, allow_nan=False) 209 return str(res) 210 211 212 class Skip: 213 pass 214 215 216 skip = Skip() 217 218 219 def chunk(items, chunk_size): 220 'Break iterable into chunks, each with up to the item-count given.' 221 222 if isinstance(items, str): 223 n = len(items) 224 while n >= chunk_size: 225 yield items[:chunk_size] 226 items = items[chunk_size:] 227 n -= chunk_size 228 if n > 0: 229 yield items 230 return 231 232 if not isinstance(chunk_size, int): 233 raise Exception('non-integer chunk-size') 234 if chunk_size < 1: 235 raise Exception('non-positive chunk-size') 236 237 it = iter(items) 238 while True: 239 head = tuple(islice(it, chunk_size)) 240 if not head: 241 return 242 yield head 243 244 chunked = chunk 245 246 247 # re_cache is used by custom func compile to cache previously-compiled 248 # regular-expressions, which makes them quicker to (re)use in formulas 249 re_cache = {} 250 251 # ire_cache is like re_cache, except it's for case-insensitive regexes 252 ire_cache = {} 253 254 255 def compile(expr, flags = 0): 256 'Speed-up using regexes across lines, by avoiding recompilations.' 257 258 if flags != 0 and flags != IGNORECASE: 259 msg = 'only the default and case-insensitive options are supported' 260 raise Exception(msg) 261 262 cache = re_cache if flags == 0 else ire_cache 263 if expr in cache: 264 return cache[expr] 265 266 pat = compile_uncached(expr, flags) 267 cache[expr] = pat 268 return pat 269 270 271 def icompile(expr): 272 return compile(expr, IGNORECASE) 273 274 275 def cond(*args): 276 if len(args) == 0: 277 return None 278 279 for i, e in enumerate(args): 280 if i % 2 == 0 and i < len(args) - 1 and e: 281 return args[i + 1] 282 283 return args[-1] if len(args) % 2 == 1 else None 284 285 286 def dive(into, using): 287 'Depth-first recursive caller for 1-input functions.' 288 289 if callable(into): 290 into, using = using, into 291 292 def rec(v): 293 if isinstance(v, dict): 294 return {k: rec(v) for k, v in v.items()} 295 if isinstance(v, Iterable) and not isinstance(v, str): 296 return [rec(v) for v in v] 297 return using(v) 298 299 return rec(into) 300 301 302 def divekeys(into, using): 303 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 304 305 if callable(into): 306 into, using = using, into 307 308 def rec(v): 309 if isinstance(v, dict): 310 return {using(k): rec(v) for k, v in v.items()} 311 if isinstance(v, Iterable) and not isinstance(v, str): 312 return [rec(v) for i, v in enumerate(v)] 313 return v 314 315 return rec(None, into) 316 317 318 def divekv(into, using, using2 = None): 319 'Depth-first recursive caller for 2-input functions.' 320 321 if using2 is None: 322 if callable(into): 323 into, using = using, into 324 else: 325 if not callable(using2): 326 into, using, using2 = using2, into, using 327 328 def rec(k, v): 329 if isinstance(v, dict): 330 return {k: rec(k, v) for k, v in v.items()} 331 if isinstance(v, Iterable) and not isinstance(v, str): 332 return [rec(i, v) for i, v in enumerate(v)] 333 return using(k, v) 334 335 def rec2(k, v): 336 if isinstance(v, dict): 337 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 338 if isinstance(v, Iterable) and not isinstance(v, str): 339 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 340 return [rec2(i, v) for i, v in enumerate(v)] 341 return using2(k, v) 342 343 return rec(None, into) if using2 is None else rec2(None, into) 344 345 kvdive = divekv 346 347 348 def drop(src, *what): 349 if isinstance(src, str): 350 for s in what: 351 src = src.replace(s, '') 352 return src 353 354 def kdrop(src, what): 355 kv = {} 356 for k, v in src.items(): 357 if not (k in what): 358 kv[k] = v 359 return kv 360 361 if isinstance(src, dict): 362 return kdrop(src, set(what)) 363 364 if isinstance(src, Iterable): 365 what = set(what) 366 return [kdrop(e, what) for e in src] 367 368 return None 369 370 dropped = drop 371 372 373 def join(x, y = ' '): 374 'Join values into a string, or make a dict from keys and values.' 375 376 if isinstance(x, str): 377 return x.join(str(v) for v in y) 378 if isinstance(y, str): 379 return y.join(str(v) for v in x) 380 return {k: v for k, v in zip(x, y)} 381 382 383 def pick(src, *keys): 384 if isinstance(src, dict): 385 return {k: src.get(k, None) for k in keys} 386 return [{k: e.get(k, None) for k in keys} for e in src] 387 388 389 def rescue(attempt, fallback = None): 390 try: 391 return attempt() 392 except Exception as e: 393 if callable(fallback): 394 return fallback(e) 395 return fallback 396 397 catch = rescue 398 catched = rescue 399 caught = rescue 400 recover = rescue 401 recovered = rescue 402 rescued = rescue 403 404 405 def retype(x): 406 'Try to narrow the type of the value given.' 407 408 if isinstance(x, float): 409 n = int(x) 410 return n if float(n) == x else x 411 412 if not isinstance(x, str): 413 return x 414 415 try: 416 return loads(x) 417 except Exception: 418 pass 419 420 try: 421 return int(x) 422 except Exception: 423 pass 424 425 try: 426 return float(x) 427 except Exception: 428 pass 429 430 return x 431 432 autocast = retype 433 autocasted = retype 434 mold = retype 435 molded = retype 436 recast = retype 437 recasted = retype 438 remold = retype 439 remolded = retype 440 retyped = retype 441 442 443 def json0(x): 444 return dumps(x, separators=(',', ':'), allow_nan=False, indent=None) 445 446 j0 = json0 447 448 def jsonl(x): 449 if isinstance(x, Skip): 450 return 451 452 def emit(x): 453 sep = (', ', ': ') 454 return dumps(x, separators=sep, allow_nan=False, indent=None) 455 456 if x is None: 457 yield emit(x) 458 return 459 460 if isinstance(x, (bool, int, float, dict, str)): 461 yield emit(x) 462 return 463 464 if isinstance(x, Iterable): 465 for e in x: 466 if isinstance(e, Skip): 467 continue 468 yield emit(x) 469 return 470 471 yield emit(str(x)) 472 473 jl = jsonl 474 jsonlines = jsonl 475 ndjson = jsonl 476 477 478 def typeof(x): 479 # return str(type(x)) 480 return { 481 type(None): 'null', 482 bool: 'boolean', 483 dict: 'object', 484 float: 'number', 485 int: 'number', 486 str: 'string', 487 list: 'array', 488 tuple: 'array', 489 }.get(type(x), 'other') 490 491 jstype = typeof 492 493 494 def wait(seconds, result): 495 'Wait the given number of seconds, before returning its latter arg.' 496 497 t = (int, float) 498 if (not isinstance(seconds, t)) and isinstance(result, t): 499 seconds, result = result, seconds 500 sleep(seconds) 501 return result 502 503 delay = wait 504 505 506 def after(x, what): 507 i = x.find(what) 508 return '' if i < 0 else x[i+len(what):] 509 510 def afterlast(x, what): 511 i = x.rfind(what) 512 return '' if i < 0 else x[i+len(what):] 513 514 afterfinal = afterlast 515 516 def before(x, what): 517 i = x.find(what) 518 return x if i < 0 else x[:i] 519 520 def beforelast(x, what): 521 i = x.rfind(what) 522 return x if i < 0 else x[:i] 523 524 beforefinal = beforelast 525 526 def since(x, what): 527 i = x.find(what) 528 return '' if i < 0 else x[i:] 529 530 def sincelast(x, what): 531 i = x.rfind(what) 532 return '' if i < 0 else x[i:] 533 534 sincefinal = sincelast 535 536 def until(x, what): 537 i = x.find(what) 538 return x if i < 0 else x[:i+len(what)] 539 540 def untilfinal(x, what): 541 i = x.rfind(what) 542 return x if i < 0 else x[:i+len(what)] 543 544 untillast = untilfinal 545 546 547 def blue(s): 548 return f'\x1b[38;2;0;95;215m{s}\x1b[0m' 549 550 def blueback(s): 551 return f'\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m{s}\x1b[0m' 552 553 bluebg = blueback 554 555 def bold(s): 556 return f'\x1b[1m{s}\x1b[0m' 557 558 bolded = bold 559 560 def gbm(s, good = False, bad = False, meh = False): 561 ''' 562 Good, Bad, Meh ANSI-styles a plain string via ANSI-style sequences, 563 according to 1..3 conditions given as boolean(ish) values: these are 564 checked in order, so the first truish one wins. 565 ''' 566 567 if good: 568 return green(s) 569 if bad: 570 return red(s) 571 if meh: 572 return gray(s) 573 return s 574 575 def gray(s): 576 return f'\x1b[38;2;168;168;168m{s}\x1b[0m' 577 578 def grayback(s): 579 return f'\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m{s}\x1b[0m' 580 581 graybg = grayback 582 583 def green(s): 584 return f'\x1b[38;2;0;135;95m{s}\x1b[0m' 585 586 def greenback(s): 587 return f'\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m{s}\x1b[0m' 588 589 greenbg = greenback 590 591 def highlight(s): 592 return f'\x1b[7m{s}\x1b[0m' 593 594 hilite = highlight 595 596 def magenta(s): 597 return f'\x1b[38;2;215;0;255m{s}\x1b[0m' 598 599 def magentaback(s): 600 return f'\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 601 602 magback = magentaback 603 magbg = magentaback 604 magentabg = magentaback 605 606 def orange(s): 607 return f'\x1b[38;2;215;95;0m{s}\x1b[0m' 608 609 def orangeback(s): 610 return f'\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 611 612 orangebg = orangeback 613 orback = orangeback 614 orbg = orangeback 615 616 def purple(s): 617 return f'\x1b[38;2;135;95;255m{s}\x1b[0m' 618 619 def purpleback(s): 620 return f'\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 621 622 purback = purpleback 623 purbg = purpleback 624 purplebg = purpleback 625 626 def red(s): 627 return f'\x1b[38;2;204;0;0m{s}\x1b[0m' 628 629 def redback(s): 630 return f'\x1b[38;2;204;0;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 631 632 redbg = redback 633 634 def underline(s): 635 return f'\x1b[4m{s}\x1b[0m' 636 637 underlined = underline 638 639 640 641 def fail(msg, code = 1): 642 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 643 exit(code) 644 645 646 def make_open_utf8(open): 647 def open_utf8_readonly(path): 648 return open(path, encoding='utf-8') 649 return open_utf8_readonly 650 651 652 def message(msg, result = None): 653 print(msg, file=stderr) 654 return result 655 656 msg = message 657 658 659 def seemsurl(path): 660 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 661 return any(path.startswith(p) for p in protocols) 662 663 664 cr = '\r' 665 crlf = '\r\n' 666 dquo = '"' 667 dquote = '"' 668 empty = '' 669 lcurly = '{' 670 lf = '\n' 671 rcurly = '}' 672 space = ' ' 673 squo = '\'' 674 squote = '\'' 675 tab = '\t' 676 677 nil = None 678 none = None 679 null = None 680 681 682 exec = None 683 open_utf8 = make_open_utf8(open) 684 open = open_utf8 685 686 no_input_opts = ( 687 '=', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 688 ) 689 modules_opts = ( 690 '-m', '--m', '-mod', '--mod', '-module', '--module', 691 '-modules', '--modules', 692 ) 693 more_modules_opts = ('-mm', '--mm', '-more', '--more') 694 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 695 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 696 697 args = argv[1:] 698 if any(seemsurl(e) for e in args): 699 from io import TextIOWrapper 700 from urllib.request import urlopen 701 702 no_input = False 703 pipe_mode = False 704 trace_errors = False 705 706 while len(args) > 0: 707 if args[0] in no_input_opts: 708 no_input = True 709 args = args[1:] 710 continue 711 712 if args[0] in pipe_opts: 713 pipe_mode = True 714 args = args[1:] 715 break 716 717 if args[0] in modules_opts: 718 try: 719 if len(args) < 2: 720 msg = 'a module name or a comma-separated list of modules' 721 raise Exception('expected ' + msg) 722 723 g = globals() 724 from importlib import import_module 725 for e in args[1].split(','): 726 g[e] = import_module(e) 727 728 g = None 729 import_module = None 730 args = args[2:] 731 except Exception as e: 732 fail(e, 1) 733 734 continue 735 736 if args[0] in more_modules_opts: 737 import functools, itertools, json, math, random, statistics, string, time 738 args = args[1:] 739 continue 740 741 if args[0] in trace_opts: 742 trace_errors = True 743 args = args[1:] 744 continue 745 746 break 747 748 749 try: 750 if pipe_mode: 751 if no_input: 752 raise Exception('can\'t use pipe-mode when input is disabled') 753 exprs = [compile_py(e, e, mode='eval') for e in args] 754 compile_py = None 755 handle_pipe(stdin, exprs) 756 exit(0) 757 758 expr = '.' 759 if len(args) > 0: 760 expr = args[0] 761 args = args[1:] 762 763 if expr == '.' and no_input: 764 print(info.strip(), file=stderr) 765 exit(0) 766 767 if expr == '.': 768 expr = 'line' 769 770 expr = compile_py(expr, expr, mode='eval') 771 compile_py = None 772 773 if no_input: 774 handle_no_input(expr) 775 exit(0) 776 777 if len(args) == 0: 778 handle_lines(stdin, expr) 779 exit(0) 780 781 got_stdin = False 782 all_stdin = None 783 dashes = args.count('-') 784 785 for path in args: 786 if path == '-': 787 if dashes > 1: 788 if not got_stdin: 789 all_stdin = [] 790 handle_lines(hold_lines(stdin, all_stdin), expr) 791 got_stdin = True 792 else: 793 handle_lines(all_stdin, expr) 794 else: 795 handle_lines(stdin, expr) 796 continue 797 798 if seemsurl(path): 799 with urlopen(path) as inp: 800 with TextIOWrapper(inp, encoding='utf-8') as txt: 801 handle_lines(txt, expr) 802 continue 803 804 with open_utf8(path) as txt: 805 handle_lines(txt, expr) 806 except BrokenPipeError: 807 # quit quietly, instead of showing a confusing error message 808 stderr.close() 809 exit(0) 810 except KeyboardInterrupt: 811 # stderr.close() 812 exit(2) 813 except Exception as e: 814 if trace_errors: 815 raise e 816 else: 817 fail(e, 1)