File: tlp.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 info = ''' 27 tlp [options...] [python expression] [files/URIs...] 28 29 30 Transform Lines with Python runs a python expression on each line of text 31 input, encoded as UTF-8. Carriage-returns are always ignored in lines, as 32 well as any UTF-8-BOM on the first line of each input. 33 34 The expression can use either `l` or `line` for the current line, and `i` as 35 a 0-based line counter, which keeps growing, even across input-sources, when 36 given more than one. 37 38 Input-sources can be either files or web-URIs. When not given any explicit 39 named sources, the standard input is used. It's even possible to reuse the 40 standard input using multiple single dashes (-) in the order needed: stdin 41 is only read once in this case, and kept for later reuse. 42 43 When the expression results in None, the current input line is ignored. When 44 the expression results in a boolean, this determines whether the line is 45 emitted to the standard output, or ignored. 46 47 When the expression emits lists, tuples, or generators, each item is emitted 48 as its own line/result. Since empty containers emit no lines, these are the 49 most general type of results, acting as either filters, or input-amplifiers. 50 51 52 Examples 53 54 # numbers from 0 to 5, each on its own output line; no input is read/used 55 tlp = 'range(6)' 56 57 # all powers up to the 4th, using each input line auto-parsed into a `float` 58 tlp = 'range(1, 6)' | tlp '(float(l)**p for p in range(1, 4+1))' 59 60 # separate input lines with an empty line between each; global var `empty` 61 # can be used to avoid bothering with nested shell-quoting 62 tlp = 'range(6)' | tlp '["", l] if i > 0 else l' 63 64 # ignore errors/exceptions, in favor of the original lines/values 65 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), line)' 66 67 # ignore errors/exceptions, calling a fallback func with the exception 68 tlp = '("abc", "123")' | tlp 'rescue(lambda: 2 * float(line), str)' 69 70 # filtering lines out via None values 71 head -c 1024 /dev/urandom | strings | tlp 'l if len(l) < 20 else None' 72 73 # boolean-valued results are concise ways to filter lines out 74 head -c 1024 /dev/urandom | strings | tlp 'len(l) < 20' 75 76 # function/callable results are automatically called on the current line 77 head -c 1024 /dev/urandom | strings | tlp len 78 ''' 79 80 81 from itertools import islice 82 from json import dumps, loads 83 compile_py = compile 84 from re import compile as compile_uncached, IGNORECASE 85 from sys import argv, exit, stderr, stdin 86 from time import sleep 87 from typing import Generator, Iterable 88 89 90 if len(argv) < 2: 91 print(info.strip(), file=stderr) 92 exit(0) 93 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 94 print(info.strip()) 95 exit(0) 96 97 98 def handle_no_input(expr): 99 res = eval(expr) 100 if isinstance(res, (list, range, tuple, Generator)): 101 for e in res: 102 if not isinstance(e, Skip): 103 print(e, flush=True) 104 return 105 106 res = adapt_result(res, None) 107 if not (res is None): 108 print(res, flush=True) 109 110 111 def handle_lines(src, expr): 112 # `comprehension` expressions seem to ignore local variables: even 113 # lambda-based workarounds fail 114 global i, l, line, v, val, value 115 116 i = 0 117 for e in src: 118 l = e.rstrip('\r\n').rstrip('\n') 119 if i == 0: 120 l = l.lstrip('\xef\xbb\xbf') 121 122 line = l 123 try: 124 v = val = value = loads(l) 125 except Exception: 126 v = val = value = Skip() 127 res = eval(expr) 128 i += 1 129 130 if isinstance(res, (list, range, tuple, Generator)): 131 for e in res: 132 if not isinstance(e, Skip): 133 print(e, flush=True) 134 continue 135 136 res = adapt_result(res, line) 137 if not (res is None): 138 print(res, flush=True) 139 140 141 def handle_pipe(src, expressions): 142 # `comprehension` expressions seem to ignore local variables: even 143 # lambda-based workarounds fail 144 global i, l, line, v, val, value 145 # variable names `o` and `p` work like in the `pyp` tool, except 146 # the pipeline steps were given as separate cmd-line arguments 147 global o, p 148 149 i = 0 150 for e in src: 151 l = e.rstrip('\r\n').rstrip('\n') 152 if i == 0: 153 l = l.lstrip('\xef\xbb\xbf') 154 155 line = l 156 o = p = prev = line 157 try: 158 v = val = value = loads(l) 159 except Exception: 160 v = val = value = Skip() 161 162 for expr in expressions: 163 p = eval(expr) 164 if callable(p): 165 p = p(prev) 166 prev = p 167 168 res = p 169 i += 1 170 171 if isinstance(res, (list, range, tuple, Generator)): 172 for e in res: 173 if not isinstance(e, Skip): 174 print(e, flush=True) 175 continue 176 177 res = adapt_result(res, line) 178 if not (res is None): 179 print(res, flush=True) 180 181 182 def hold_lines(src, lines): 183 for e in src: 184 lines.append(e) 185 yield e 186 187 188 def adapt_result(res, fallback): 189 if isinstance(res, Skip): 190 return res 191 if res is None or res is False: 192 return None 193 if callable(res): 194 return res(fallback) 195 if res is True: 196 return fallback 197 if isinstance(res, dict): 198 return dumps(res, allow_nan=False) 199 return str(res) 200 201 202 class Skip: 203 pass 204 205 206 skip = Skip() 207 208 209 def chunk(items, chunk_size): 210 'Break iterable into chunks, each with up to the item-count given.' 211 212 if isinstance(items, str): 213 n = len(items) 214 while n >= chunk_size: 215 yield items[:chunk_size] 216 items = items[chunk_size:] 217 n -= chunk_size 218 if n > 0: 219 yield items 220 return 221 222 if not isinstance(chunk_size, int): 223 raise Exception('non-integer chunk-size') 224 if chunk_size < 1: 225 raise Exception('non-positive chunk-size') 226 227 it = iter(items) 228 while True: 229 head = tuple(islice(it, chunk_size)) 230 if not head: 231 return 232 yield head 233 234 chunked = chunk 235 236 237 # re_cache is used by custom func compile to cache previously-compiled 238 # regular-expressions, which makes them quicker to (re)use in formulas 239 re_cache = {} 240 241 # ire_cache is like re_cache, except it's for case-insensitive regexes 242 ire_cache = {} 243 244 245 def compile(expr, flags = 0): 246 'Speed-up using regexes across lines, by avoiding recompilations.' 247 248 if flags != 0 and flags != IGNORECASE: 249 msg = 'only the default and case-insensitive options are supported' 250 raise Exception(msg) 251 252 cache = re_cache if flags == 0 else ire_cache 253 if expr in cache: 254 return cache[expr] 255 256 pat = compile_uncached(expr, flags) 257 cache[expr] = pat 258 return pat 259 260 261 def icompile(expr): 262 return compile(expr, IGNORECASE) 263 264 265 def cond(*args): 266 if len(args) == 0: 267 return None 268 269 for i, e in enumerate(args): 270 if i % 2 == 0 and i < len(args) - 1 and e: 271 return args[i + 1] 272 273 return args[-1] if len(args) % 2 == 1 else None 274 275 276 def dive(into, using): 277 'Depth-first recursive caller for 1-input functions.' 278 279 if callable(into): 280 into, using = using, into 281 282 def rec(v): 283 if isinstance(v, dict): 284 return {k: rec(v) for k, v in v.items()} 285 if isinstance(v, Iterable) and not isinstance(v, str): 286 return [rec(v) for v in v] 287 return using(v) 288 289 return rec(into) 290 291 292 def divekeys(into, using): 293 'Depth-first recursive caller for 2-input funcs which rename dict keys.' 294 295 if callable(into): 296 into, using = using, into 297 298 def rec(v): 299 if isinstance(v, dict): 300 return {using(k): rec(v) for k, v in v.items()} 301 if isinstance(v, Iterable) and not isinstance(v, str): 302 return [rec(v) for i, v in enumerate(v)] 303 return v 304 305 return rec(None, into) 306 307 308 def divekv(into, using, using2 = None): 309 'Depth-first recursive caller for 2-input functions.' 310 311 if using2 is None: 312 if callable(into): 313 into, using = using, into 314 else: 315 if not callable(using2): 316 into, using, using2 = using2, into, using 317 318 def rec(k, v): 319 if isinstance(v, dict): 320 return {k: rec(k, v) for k, v in v.items()} 321 if isinstance(v, Iterable) and not isinstance(v, str): 322 return [rec(i, v) for i, v in enumerate(v)] 323 return using(k, v) 324 325 def rec2(k, v): 326 if isinstance(v, dict): 327 return {str(using(k, v)): rec2(k, v) for k, v in v.items()} 328 if isinstance(v, Iterable) and not isinstance(v, str): 329 # return {str(using(i, v)): rec2(i, v) for i, v in enumerate(v)} 330 return [rec2(i, v) for i, v in enumerate(v)] 331 return using2(k, v) 332 333 return rec(None, into) if using2 is None else rec2(None, into) 334 335 kvdive = divekv 336 337 338 def drop(src, *what): 339 if isinstance(src, str): 340 for s in what: 341 src = src.replace(s, '') 342 return src 343 344 def kdrop(src, what): 345 kv = {} 346 for k, v in src.items(): 347 if not (k in what): 348 kv[k] = v 349 return kv 350 351 if isinstance(src, dict): 352 return kdrop(src, set(what)) 353 354 if isinstance(src, Iterable): 355 what = set(what) 356 return [kdrop(e, what) for e in src] 357 358 return None 359 360 dropped = drop 361 362 363 def join(x, y = ' '): 364 'Join values into a string, or make a dict from keys and values.' 365 366 if isinstance(x, str): 367 return x.join(str(v) for v in y) 368 if isinstance(y, str): 369 return y.join(str(v) for v in x) 370 return {k: v for k, v in zip(x, y)} 371 372 373 def pick(src, *keys): 374 if isinstance(src, dict): 375 return {k: src.get(k, None) for k in keys} 376 return [{k: e.get(k, None) for k in keys} for e in src] 377 378 379 def rescue(attempt, fallback = None): 380 try: 381 return attempt() 382 except Exception as e: 383 if callable(fallback): 384 return fallback(e) 385 return fallback 386 387 catch = rescue 388 catched = rescue 389 caught = rescue 390 recover = rescue 391 recovered = rescue 392 rescued = rescue 393 394 395 def retype(x): 396 'Try to narrow the type of the value given.' 397 398 if isinstance(x, float): 399 n = int(x) 400 return n if float(n) == x else x 401 402 if not isinstance(x, str): 403 return x 404 405 try: 406 return loads(x) 407 except Exception: 408 pass 409 410 try: 411 return int(x) 412 except Exception: 413 pass 414 415 try: 416 return float(x) 417 except Exception: 418 pass 419 420 return x 421 422 autocast = retype 423 autocasted = retype 424 mold = retype 425 molded = retype 426 recast = retype 427 recasted = retype 428 remold = retype 429 remolded = retype 430 retyped = retype 431 432 433 def json0(x): 434 return dumps(x, separators=(',', ':'), allow_nan=False, indent=None) 435 436 j0 = json0 437 438 def jsonl(x): 439 if isinstance(x, Skip): 440 return 441 442 def emit(x): 443 sep = (', ', ': ') 444 return dumps(x, separators=sep, allow_nan=False, indent=None) 445 446 if x is None: 447 yield emit(x) 448 return 449 450 if isinstance(x, (bool, int, float, dict, str)): 451 yield emit(x) 452 return 453 454 if isinstance(x, Iterable): 455 for e in x: 456 if isinstance(e, Skip): 457 continue 458 yield emit(x) 459 return 460 461 yield emit(str(x)) 462 463 jl = jsonl 464 jsonlines = jsonl 465 ndjson = jsonl 466 467 468 def typeof(x): 469 # return str(type(x)) 470 return { 471 type(None): 'null', 472 bool: 'boolean', 473 dict: 'object', 474 float: 'number', 475 int: 'number', 476 str: 'string', 477 list: 'array', 478 tuple: 'array', 479 }.get(type(x), 'other') 480 481 jstype = typeof 482 483 484 def wait(seconds, result): 485 'Wait the given number of seconds, before returning its latter arg.' 486 487 t = (int, float) 488 if (not isinstance(seconds, t)) and isinstance(result, t): 489 seconds, result = result, seconds 490 sleep(seconds) 491 return result 492 493 delay = wait 494 495 496 def after(x, what): 497 i = x.find(what) 498 return '' if i < 0 else x[i+len(what):] 499 500 def afterlast(x, what): 501 i = x.rfind(what) 502 return '' if i < 0 else x[i+len(what):] 503 504 afterfinal = afterlast 505 506 def before(x, what): 507 i = x.find(what) 508 return x if i < 0 else x[:i] 509 510 def beforelast(x, what): 511 i = x.rfind(what) 512 return x if i < 0 else x[:i] 513 514 beforefinal = beforelast 515 516 def since(x, what): 517 i = x.find(what) 518 return '' if i < 0 else x[i:] 519 520 def sincelast(x, what): 521 i = x.rfind(what) 522 return '' if i < 0 else x[i:] 523 524 sincefinal = sincelast 525 526 def until(x, what): 527 i = x.find(what) 528 return x if i < 0 else x[:i+len(what)] 529 530 def untilfinal(x, what): 531 i = x.rfind(what) 532 return x if i < 0 else x[:i+len(what)] 533 534 untillast = untilfinal 535 536 537 def blue(s): 538 return f'\x1b[38;2;0;95;215m{s}\x1b[0m' 539 540 def blueback(s): 541 return f'\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m{s}\x1b[0m' 542 543 bluebg = blueback 544 545 def bold(s): 546 return f'\x1b[1m{s}\x1b[0m' 547 548 bolded = bold 549 550 def gbm(s, good = False, bad = False, meh = False): 551 ''' 552 Good, Bad, Meh ANSI-styles a plain string via ANSI-style sequences, 553 according to 1..3 conditions given as boolean(ish) values: these are 554 checked in order, so the first truish one wins. 555 ''' 556 557 if good: 558 return green(s) 559 if bad: 560 return red(s) 561 if meh: 562 return gray(s) 563 return s 564 565 def gray(s): 566 return f'\x1b[38;2;168;168;168m{s}\x1b[0m' 567 568 def grayback(s): 569 return f'\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m{s}\x1b[0m' 570 571 graybg = grayback 572 573 def green(s): 574 return f'\x1b[38;2;0;135;95m{s}\x1b[0m' 575 576 def greenback(s): 577 return f'\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m{s}\x1b[0m' 578 579 greenbg = greenback 580 581 def highlight(s): 582 return f'\x1b[7m{s}\x1b[0m' 583 584 hilite = highlight 585 586 def magenta(s): 587 return f'\x1b[38;2;215;0;255m{s}\x1b[0m' 588 589 def magentaback(s): 590 return f'\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 591 592 magback = magentaback 593 magbg = magentaback 594 magentabg = magentaback 595 596 def orange(s): 597 return f'\x1b[38;2;215;95;0m{s}\x1b[0m' 598 599 def orangeback(s): 600 return f'\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 601 602 orangebg = orangeback 603 orback = orangeback 604 orbg = orangeback 605 606 def purple(s): 607 return f'\x1b[38;2;135;95;255m{s}\x1b[0m' 608 609 def purpleback(s): 610 return f'\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m{s}\x1b[0m' 611 612 purback = purpleback 613 purbg = purpleback 614 purplebg = purpleback 615 616 def red(s): 617 return f'\x1b[38;2;204;0;0m{s}\x1b[0m' 618 619 def redback(s): 620 return f'\x1b[38;2;204;0;0m\x1b[38;2;238;238;238m{s}\x1b[0m' 621 622 redbg = redback 623 624 def underline(s): 625 return f'\x1b[4m{s}\x1b[0m' 626 627 underlined = underline 628 629 630 631 def fail(msg, code = 1): 632 print(f'\x1b[31m{str(msg)}\x1b[0m', file=stderr) 633 exit(code) 634 635 636 def make_open_utf8(open): 637 def open_utf8_readonly(path): 638 return open(path, encoding='utf-8') 639 return open_utf8_readonly 640 641 642 def message(msg, result = None): 643 print(msg, file=stderr) 644 return result 645 646 msg = message 647 648 649 def seemsurl(path): 650 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 651 return any(path.startswith(p) for p in protocols) 652 653 654 cr = '\r' 655 crlf = '\r\n' 656 dquo = '"' 657 dquote = '"' 658 empty = '' 659 lcurly = '{' 660 lf = '\n' 661 rcurly = '}' 662 s = '' 663 squo = '\'' 664 squote = '\'' 665 # utf8bom = '\xef\xbb\xbf' 666 667 nil = None 668 none = None 669 null = None 670 671 672 exec = None 673 open_utf8 = make_open_utf8(open) 674 open = open_utf8 675 676 no_input_opts = ( 677 '=', '--n', '-nil', '--nil', '-none', '--none', '-null', '--null', 678 ) 679 modules_opts = ( 680 '-m', '--m', '-mod', '--mod', '-module', '--module', 681 '-modules', '--modules', 682 ) 683 more_modules_opts = ('-mm', '--mm', '-more', '--more') 684 pipe_opts = ('-p', '--p', '-pipe', '--pipe') 685 trace_opts = ('-t', '--t', '-trace', '--trace', '-traceback', '--traceback') 686 687 args = argv[1:] 688 if any(seemsurl(e) for e in args): 689 from io import TextIOWrapper 690 from urllib.request import urlopen 691 692 no_input = False 693 pipe_mode = False 694 trace_errors = False 695 696 while len(args) > 0: 697 if args[0] in no_input_opts: 698 no_input = True 699 args = args[1:] 700 continue 701 702 if args[0] in pipe_opts: 703 pipe_mode = True 704 args = args[1:] 705 break 706 707 if args[0] in modules_opts: 708 try: 709 if len(args) < 2: 710 msg = 'a module name or a comma-separated list of modules' 711 raise Exception('expected ' + msg) 712 713 g = globals() 714 from importlib import import_module 715 for e in args[1].split(','): 716 g[e] = import_module(e) 717 718 g = None 719 import_module = None 720 args = args[2:] 721 except Exception as e: 722 fail(e, 1) 723 724 continue 725 726 if args[0] in more_modules_opts: 727 import functools, itertools, json, math, random, statistics, string, time 728 args = args[1:] 729 continue 730 731 if args[0] in trace_opts: 732 trace_errors = True 733 args = args[1:] 734 continue 735 736 break 737 738 739 try: 740 if pipe_mode: 741 if no_input: 742 raise Exception('can\'t use pipe-mode when input is disabled') 743 exprs = [compile_py(e, e, mode='eval') for e in args] 744 compile_py = None 745 handle_pipe(stdin, exprs) 746 exit(0) 747 748 expr = '.' 749 if len(args) > 0: 750 expr = args[0] 751 args = args[1:] 752 753 if expr == '.' and no_input: 754 print(info.strip(), file=stderr) 755 exit(0) 756 757 if expr == '.': 758 expr = 'line' 759 760 expr = compile_py(expr, expr, mode='eval') 761 compile_py = None 762 763 if no_input: 764 handle_no_input(expr) 765 exit(0) 766 767 if len(args) == 0: 768 handle_lines(stdin, expr) 769 exit(0) 770 771 got_stdin = False 772 all_stdin = None 773 dashes = args.count('-') 774 775 for path in args: 776 if path == '-': 777 if dashes > 1: 778 if not got_stdin: 779 all_stdin = [] 780 handle_lines(hold_lines(stdin, all_stdin), expr) 781 got_stdin = True 782 else: 783 handle_lines(all_stdin, expr) 784 else: 785 handle_lines(stdin, expr) 786 continue 787 788 if seemsurl(path): 789 with urlopen(path) as inp: 790 with TextIOWrapper(inp, encoding='utf-8') as txt: 791 handle_lines(txt, expr) 792 continue 793 794 with open_utf8(path) as txt: 795 handle_lines(txt, expr) 796 except BrokenPipeError: 797 # quit quietly, instead of showing a confusing error message 798 stderr.close() 799 exit(0) 800 except KeyboardInterrupt: 801 # stderr.close() 802 exit(2) 803 except Exception as e: 804 if trace_errors: 805 raise e 806 else: 807 fail(e, 1)