File: tl.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # tl [options...] [python expressions...] 27 # 28 # Transform Lines runs Python expressions on each line from standard input: 29 # each expression given emits its result as its own line, so input lines are 30 # `amplified` when using multipe formulas, so to speak. 31 # 32 # When a formula's result is None/null, it emits no output line. 33 # 34 # Each input line is available to the expression as variables named `l`, 35 # `line`, `s`, `v`, `value`, `d`, and `data`. 36 # 37 # Each input line is also parsed into a floating-point number named `f`, 38 # which is NaN when the line can't parse into a number. 39 # 40 # Options, where leading double-dashes are also allowed: 41 # 42 # -h show this help message 43 # -help same as -h 44 # 45 # -jsonl transform JSON Lines into proper JSON, without formulas 46 # 47 # -n No input / load Nothing 48 # -nil same as -n 49 # -none same as -n 50 # -null same as -n 51 52 53 from base64 import \ 54 standard_b64encode as base64bytes, standard_b64decode as debase64bytes 55 56 from datetime import \ 57 MAXYEAR, MINYEAR, date, datetime, time, timedelta, timezone, tzinfo 58 try: 59 from datetime import UTC 60 except: 61 pass 62 63 from functools import \ 64 cache, cached_property, cmp_to_key, get_cache_token, lru_cache, \ 65 namedtuple, partial, partialmethod, recursive_repr, reduce, \ 66 singledispatch, singledispatchmethod, total_ordering, update_wrapper, \ 67 wraps 68 69 from itertools import \ 70 accumulate, chain, combinations, combinations_with_replacement, \ 71 compress, count, cycle, dropwhile, filterfalse, groupby, islice, \ 72 permutations, product, repeat, starmap, takewhile, tee, zip_longest 73 try: 74 from itertools import pairwise 75 except: 76 pass 77 78 from json import dump, dumps, loads 79 80 import math 81 from math import \ 82 acos, acosh, asin, asinh, atan, atan2, atanh, ceil, comb, \ 83 copysign, cos, cosh, degrees, dist, e, erf, erfc, exp, expm1, \ 84 fabs, factorial, floor, fmod, frexp, fsum, gamma, gcd, hypot, inf, \ 85 isclose, isfinite, isinf, isnan, isqrt, lcm, ldexp, lgamma, log, \ 86 log10, log1p, log2, modf, nan, nextafter, perm, pi, pow, prod, \ 87 radians, remainder, sin, sinh, sqrt, tan, tanh, tau, trunc, ulp 88 try: 89 from math import cbrt, exp2 90 except: 91 pass 92 93 from random import \ 94 betavariate, choice, choices, expovariate, gammavariate, gauss, \ 95 getrandbits, getstate, lognormvariate, normalvariate, paretovariate, \ 96 randbytes, randint, random, randrange, sample, seed, setstate, \ 97 shuffle, triangular, uniform, vonmisesvariate, weibullvariate 98 99 compile_py = compile # keep built-in func compile for later 100 from re import compile as compile_uncached, Pattern 101 102 from statistics import \ 103 bisect_left, bisect_right, fmean, \ 104 geometric_mean, harmonic_mean, mean, median, \ 105 median_grouped, median_high, median_low, mode, multimode, pstdev, \ 106 pvariance, quantiles, stdev, variance 107 try: 108 from statistics import \ 109 correlation, covariance, linear_regression, mul, reduce 110 except: 111 pass 112 113 from string import \ 114 Formatter, Template, ascii_letters, ascii_lowercase, ascii_uppercase, \ 115 capwords, digits, hexdigits, octdigits, printable, punctuation, \ 116 whitespace 117 118 from sys import argv, stdin, stdout 119 120 from textwrap import dedent, fill, indent, shorten, wrap 121 122 from time import \ 123 altzone, asctime, \ 124 ctime, daylight, get_clock_info, \ 125 gmtime, localtime, mktime, monotonic, monotonic_ns, perf_counter, \ 126 perf_counter_ns, process_time, process_time_ns, \ 127 sleep, strftime, strptime, struct_time, thread_time, thread_time_ns, \ 128 time, time_ns, timezone, tzname 129 try: 130 from time import \ 131 clock_getres, clock_gettime, clock_gettime_ns, clock_settime, \ 132 clock_settime_ns, pthread_getcpuclockid, tzset 133 except: 134 pass 135 136 # some defined funcs exposed to formulas use type declarations 137 from typing import Any, Iterable, List 138 139 from unicodedata import \ 140 bidirectional, category, combining, decimal, decomposition, digit, \ 141 east_asian_width, is_normalized, lookup, mirrored, name, normalize, \ 142 numeric 143 144 145 # info is the message shown when the script isn't given any argument, or 146 # when the leading argument is one of the standard cmd-line help options 147 info = ''' 148 tl [options...] [python expressions...] 149 150 Transform Lines runs Python expressions on each line from standard input: 151 each expression given emits its result as its own line, so input lines are 152 `amplified` when using multipe formulas, so to speak. 153 154 When a formula's result is None/null, it emits no output line. 155 156 Each input line is available to the expression as variables named `l`, 157 `line`, `s`, `v`, `value`, `d`, and `data`. 158 159 Each input line is also parsed into a floating-point number named `f`, 160 which is NaN when the line can't parse into a number. 161 162 Options, where leading double-dashes are also allowed: 163 164 -h show this help message 165 -help same as -h 166 167 -jsonl transform JSON Lines into proper JSON, without formulas 168 169 -n No input / load Nothing 170 -nil same as -n 171 -none same as -n 172 -null same as -n 173 '''.strip() 174 175 176 # no args or a leading help-option arg means show the help message and quit 177 if len(argv) < 2 or argv[1].lower() in ('-h', '--h', '-help', '--help'): 178 from sys import exit, stderr 179 print(info, file=stderr) 180 exit(0) 181 182 183 # re_cache is used by custom func compile to cache previously-compiled 184 # regular-expressions, which makes them quicker to (re)use in formulas 185 re_cache = {} 186 187 # ansi_style_re detects the most commonly-used ANSI-style sequences, and 188 # is used in func plain 189 ansi_style_re = compile_uncached('\x1b\[([0-9;]+m|[0-9]*[A-HJKST])') 190 191 # paddable_tab_re detects single tabs and possible runs of spaces around 192 # them, and is used in func squeeze 193 paddable_tab_re = compile_uncached(' *\t *') 194 195 # spaces_re detects runs of 2 or more spaces, and is used in func squeeze 196 spaces_re = compile_uncached(' +') 197 198 199 # some convenience aliases to commonly-used values 200 true = True 201 false = False 202 nil = None 203 none = None 204 null = None 205 block = '█' 206 cdot = '·' 207 colon = ':' 208 comma = ',' 209 crlf = '\r\n' 210 dot = '.' 211 empty = '' 212 lf = '\n' 213 mdot = '·' 214 semicolon = ';' 215 space = ' ' 216 tab = '\t' 217 utf8bom = '\xef\xbb\xbf' 218 219 # some occasionally-useful values 220 kb = 1024 221 mb = 1024 * kb 222 gb = 1024 * mb 223 tb = 1024 * gb 224 pb = 1024 * tb 225 226 months = [ 227 'January', 'February', 'March', 'April', 'May', 'June', 228 'July', 'August', 'September', 'October', 'November', 'December', 229 ] 230 231 monweek = [ 232 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 233 'Saturday', 'Sunday', 234 ] 235 236 sunweek = [ 237 'Sunday', 238 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 239 ] 240 241 # some convenience aliases to various funcs from the python stdlib 242 geomean = geometric_mean 243 harmean = harmonic_mean 244 sd = stdev 245 popsd = pstdev 246 var = variance 247 popvar = pvariance 248 randbeta = betavariate 249 randexp = expovariate 250 randgamma = gammavariate 251 randlognorm = lognormvariate 252 randnorm = normalvariate 253 randweibull = weibullvariate 254 255 256 def jsonl2json(w, src) -> None: 257 '''Turn JSON Lines read from the source given into proper JSON.''' 258 259 i = 0 260 w.write('[') 261 for line in src: 262 if i > 0: 263 w.write(',\n ') 264 else: 265 w.write('\n ') 266 dump(line.rstrip('\r\n').rstrip('\n'), w) 267 i += 1 268 269 if i == 0: 270 w.write(']\n') 271 else: 272 w.write('\n]\n') 273 274 275 def run(expressions: List[str], line: str) -> None: 276 '''Run all expressions given on each line of the input-source given''' 277 278 exec = None 279 # prevent expressions from opening files 280 open = None 281 stdin = None 282 stdout = None 283 284 # give the formulas various aliases for the current-line value 285 d = line 286 data = line 287 v = line 288 value = line 289 # input = line 290 l = line 291 s = line 292 293 f = 0.0 294 try: 295 f = float(v) 296 except: 297 f = nan 298 299 prev = None 300 for expr in expressions: 301 prev = eval(expr) 302 if prev == None: 303 continue 304 305 if isinstance(prev, Iterable) and not isinstance(prev, str): 306 for e in prev: 307 print(e) 308 elif isinstance(prev, dict): 309 for k, v in prev.items(): 310 print(f'{k}\t{v}') 311 else: 312 print(prev) 313 314 315 def dedup(v: Iterable) -> List: 316 '''Ignore reappearing items from iterables: result is always a list''' 317 318 got = set() 319 dedup = [] 320 for e in v: 321 if not e in got: 322 got.add(e) 323 dedup.append(e) 324 return dedup 325 326 unique = dedup 327 328 def fix(x: Any, repl: Any = None) -> Any: 329 '''Make values JSON-compatible''' 330 331 if x == None: 332 return x 333 elif isinstance(x, bool) or isinstance(x, int) or isinstance(x, str): 334 return x 335 elif isinstance(x, float): 336 # turn NaNs and Infinities into nulls, or the replacement value given 337 return x if not (isnan(x) or isinf(x)) else repl 338 elif isinstance(x, list): 339 return [fix(e) for e in x] 340 elif isinstance(x, dict): 341 return { k: fix(e) for k, e in x.items() } 342 else: 343 return str(x) 344 345 def after(s: str, *what: str) -> str: 346 for t in what: 347 i = s.find(t) 348 s = '' if i < 0 else s[i+len(t):] 349 return s 350 351 def after_last(s: str, *what: str) -> str: 352 for t in what: 353 i = s.rfind(t) 354 s = '' if i < 0 else s[i+len(t):] 355 return s 356 357 afterlast = after_last 358 359 def before(s: str, *what: str) -> str: 360 for t in what: 361 i = s.find(t) 362 s = s if i < 0 else s[:i] 363 return s 364 365 def before_last(s: str, *what: str) -> str: 366 for t in what: 367 i = s.rfind(t) 368 s = s if i < 0 else s[:i] 369 return s 370 371 beforelast = before_last 372 373 def since(s: str, *what: str) -> str: 374 for t in what: 375 i = s.find(t) 376 s = '' if i < 0 else s[i:] 377 return s 378 379 def since_last(s: str, *what: str) -> str: 380 for t in what: 381 i = s.rfind(t) 382 s = '' if i < 0 else s[i:] 383 return s 384 385 sincelast = since_last 386 387 def until(s: str, *what: str) -> str: 388 for t in what: 389 i = s.find(t) 390 s = s if i < 0 else s[:i+len(t)] 391 return s 392 393 def skip_empty(s: str) -> Any: 394 return s if s != '' else None 395 396 skipempty = skip_empty 397 398 def now() -> datetime: 399 return datetime.now() 400 401 def gsub(s: str, what: str, repl: str) -> str: 402 '''Replace all regex-matches with the string given''' 403 return compile(what).sub(repl, s) 404 405 def base64(x): 406 return base64bytes(str(x).encode()).decode() 407 408 def debase64(x): 409 return debase64bytes(str(x).encode()).decode() 410 411 def compile(s: str) -> Pattern: 412 '''Cached regex `compiler`, so it's quicker to (re)use in formulas''' 413 414 if s in re_cache: 415 return re_cache[s] 416 e = compile_uncached(s) 417 re_cache[s] = e 418 return e 419 420 def squeeze(s: str) -> str: 421 ''' 422 A more aggressive way to rid strings of extra spaces which, 423 unlike string method strip, also squeezes inner runs of 424 multiple spaces into single spaces 425 ''' 426 427 s = s.strip() 428 s = spaces_re.sub(' ', s) 429 s = paddable_tab_re.sub('\t', s) 430 return s 431 432 def float_or(s: str, default: Any = nan) -> Any: 433 try: 434 return float(s) 435 except: 436 return default 437 438 floator = float_or 439 440 def plain(s: str) -> str: 441 return ansi_style_re.sub('', s) 442 443 def blue(s: Any) -> str: 444 return f'\x1b[38;5;26m{s}\x1b[0m' 445 446 def bold(s: Any) -> str: 447 return f'\x1b[1m{s}\x1b[0m' 448 449 def gray(s: Any) -> str: 450 return f'\x1b[38;5;249m{s}\x1b[0m' 451 452 def green(s: Any) -> str: 453 return f'\x1b[38;5;29m{s}\x1b[0m' 454 455 def highlight(s: Any) -> str: 456 return f'\x1b[7m{s}\x1b[0m' 457 458 hilite = highlight 459 460 def magenta(s: Any) -> str: 461 return f'\x1b[38;5;165m{s}\x1b[0m' 462 463 def orange(s: Any) -> str: 464 return f'\x1b[38;5;166m{s}\x1b[0m' 465 466 def purple(s: Any) -> str: 467 return f'\x1b[38;5;99m{s}\x1b[0m' 468 469 def red(s: Any) -> str: 470 return f'\x1b[38;5;1m{s}\x1b[0m' 471 472 def underline(s: Any) -> str: 473 return f'\x1b[4m{s}\x1b[0m' 474 475 476 477 # args is the `proper` list of arguments given to the script 478 args = argv[1:] 479 use_input = True 480 jsonl = False 481 expressions = args 482 483 if len(args) == 0: 484 # show help message when given no arguments 485 from sys import exit, stderr 486 print(info, file=stderr) 487 exit(0) 488 else: 489 # handle all other leading options; the explicit help options are 490 # handled earlier in the script 491 l = args[0].lower() 492 if l in ( 493 '-c', '--c', '-n', '--n', '-nil', '--nil', '-none', '--none', 494 '-null', '--null'): 495 use_input = False 496 expressions = args[1:] 497 elif l in ( 498 '-jsonl', '--jsonl', '-jsonlines', '--jsonlines', '-json-lines', 499 '--json-lines'): 500 # enable JSONL mode, and empty the expressions list, to avoid 501 # possible compilation errors later 502 jsonl = True 503 expressions = [] 504 505 try: 506 stdout.reconfigure(newline='\n', encoding='utf-8') 507 508 # compile all expressions to speed them up, since they're all (re)run 509 # for each line from standard input; also, handle single-dot formulas 510 # as identity expressions, using the current line as is 511 expressions = [e if e != '.' else 'line' for e in expressions] 512 expressions = [compile_py(e, '<string>', 'eval') for e in expressions] 513 if len(expressions) == 0 and not jsonl: 514 exit(0) 515 516 if jsonl: 517 # handle stdin lines as JSONL 518 stdin.reconfigure(encoding='utf-8') 519 jsonl2json(stdout, stdin) 520 elif use_input: 521 # handle stdin lines with the formulas given 522 stdin.reconfigure(encoding='utf-8') 523 524 i = 0 525 for line in stdin: 526 n = i + 1 527 nr = i + 1 528 run(expressions, line.rstrip('\r\n').rstrip('\n')) 529 i += 1 530 else: 531 # run formulas once, with no input 532 stdin = None 533 stdout = None 534 run(expressions, '') 535 except BrokenPipeError: 536 # quit quietly, instead of showing a confusing error message 537 from sys import stderr 538 stderr.flush() 539 stderr.close() 540 except KeyboardInterrupt: 541 # quit quietly, instead of showing a confusing error message 542 from sys import exit, stderr 543 stderr.flush() 544 stderr.close() 545 exit(2) 546 except Exception as e: 547 from sys import exit, stderr 548 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 549 exit(1)