File: awksome.sh 1 #!/bin/sh 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2020-2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # AWKsome 27 # 28 # This is a collection of arguably useful shell functions/shortcuts which 29 # use AWK. Some of these can also be implemented using tools such as `sed` 30 # and `tr`, but these tools don't support any number of trailing filenames, 31 # which makes using their AWK-based versions more convenient to use. 32 # 33 # Some tools also use `curl`, since they need data from live outside sources; 34 # some tools also use `sed`, for convenience. 35 # 36 # A few of these explicitly use GNU awk (gawk), mainly due to its support of 37 # UTF-8: it's better for something to result in an overt error/failure (such 38 # as `gawk not installed`) than risk incurring into subtle bugs (handling 39 # multi-byte UTF-8 runes as multiple separate text items). 40 41 42 # dash doesn't support regex-matching syntax, forcing to use case statements 43 case "$0" in 44 -bash|-dash|-sh|bash|dash|sh) 45 # script is being sourced with bash or dash, which is good 46 : 47 ;; 48 *) 49 case "$ZSH_EVAL_CONTEXT" in 50 *:file) 51 # script is being sourced with zsh, which is good 52 : 53 ;; 54 *) 55 # script is being run normally, which is a waste of time 56 printf "\e[48;2;255;255;135m\e[38;2;0;0;0mDon't run this script, source it instead: to do that,\e[0m\n" 57 printf "\e[48;2;255;255;135m\e[38;2;0;0;0mrun 'source awksome' or '. awksome' (no quotes either way).\e[0m\n" 58 # failing during shell-startup may deny shell access, so exit 59 # with a 0 error-code to declare success 60 exit 0 61 ;; 62 esac 63 ;; 64 esac 65 66 67 # emit each argument given as its own line of output 68 args() { awk 'BEGIN { for (i = 1; i < ARGC; i++) print ARGV[i]; exit }' "$@"; } 69 70 # avoid/ignore lines which match any of the regexes given 71 avoid() { 72 awk ' 73 BEGIN { for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } } 74 75 { 76 for (i = 1; i < ARGC; i++) if ($0 ~ e[i]) next 77 print; fflush() 78 got++ 79 } 80 81 END { exit(got == 0) } 82 ' "${@:-^\r?$}" 83 } 84 85 # BACKward-SORT (numerically) using values from the columns whose name match 86 # the arguments given, either exactly, case-insensitively, as a 1-based index, 87 # or even as a negative/backward indices. 88 # 89 # Sorting happens by comparing fields in the order given. 90 # 91 # The output is always lines of TSV (tab-separated values) items, even when 92 # the lines from stdin aren't. 93 backsort() { 94 awk ' 95 function findcol(name, lowname, i) { 96 for (i = 1; i <= NF; i++) { 97 if (name == $i) return i 98 } 99 100 for (i = 1; i <= NF; i++) { 101 if (lowname == tolower($i)) return i 102 } 103 104 if (1 <= name && name <= NF) return name + 0 105 if (name < 0 && -name <= NF) return NF + name + 1 106 107 return 0 108 } 109 110 BEGIN { 111 for (i = 1; i < ARGC; i++) { 112 colnames[i] = ARGV[i] 113 lownames[i] = tolower(colnames[i]) 114 delete ARGV[i] 115 } 116 } 117 118 { gsub(/\r$/, "") } 119 120 NR == 1 { 121 if ($0 ~ /\t/) { 122 FS = "\t" 123 $0 = $0 124 } 125 126 width = NF 127 given = length(colnames) 128 129 for (i = 1; i <= given; i++) { 130 j = findcol(colnames[i], lownames[i]) 131 if (j > 0) pos[++numcols] = j 132 133 if (j == 0) { 134 fmt = "\x1b[31mno column match for \"%s\"\x1b[0m\n" 135 printf(fmt, colnames[i]) > "/dev/stderr" 136 errors++ 137 } 138 } 139 140 if (errors > 0) exit 1 141 142 cmd = "sort -t '\t'" 143 for (i = 1; i <= numcols; i++) { 144 cmd = cmd sprintf(" -rnk%d", pos[i]) 145 } 146 147 for (i = 1; i <= width; i++) { 148 if (i > 1) printf "\t" 149 printf("%s", $i) 150 } 151 printf "\n"; fflush() 152 153 next 154 } 155 156 { 157 for (i = 1; i <= width; i++) { 158 if (i > 1) printf "\t" | cmd 159 printf("%s", $i) | cmd 160 } 161 printf "\n" | cmd 162 } 163 ' "$@" 164 } 165 166 # process Blocks/paragraphs of non-empty lines with AWK 167 # bawk() { awk -F='' -v RS='' "$@"; } 168 169 # process Blocks/paragraphs of non-empty lines with AWK 170 bawk() { stdbuf -oL awk -F='' -v RS='' "$@"; } 171 172 # start by joining all arguments given as a tab-separated-items line of output, 173 # followed by all lines from stdin verbatim 174 begintsv() { 175 awk ' 176 BEGIN { 177 for (i = 1; i < ARGC; i++) { 178 if (i > 1) printf "\t" 179 printf "%s", ARGV[i] 180 delete ARGV[i] 181 } 182 if (ARGC > 1) printf "\n" 183 fflush() 184 } 185 { print; fflush() } 186 ' "$@" 187 } 188 189 # Breathe Header: add an empty line after the first one (the header), then 190 # separate groups of 5 lines (by default) with empty lines between them 191 bh() { 192 local n="${1:-5}" 193 [ $# -gt 0 ] && shift 194 awk -v n="$n" ' 195 BEGIN { if (n == 0) n = -1 } 196 (NR - 1) % n == 1 && NR > 1 { print "" } 197 { print; fflush() } 198 ' "$@" 199 } 200 201 # Breathe Header 5: add an empty line after the first one (the header), 202 # then separate groups of 5 lines with empty lines between them 203 bh5() { 204 awk ' 205 (NR - 1) % 5 == 1 && NR > 1 { print "" } 206 { print; fflush() } 207 ' "$@" 208 } 209 210 # Breathe Lines: separate groups of 5 lines (by default) with empty lines 211 bl() { 212 local n="${1:-5}" 213 [ $# -gt 0 ] && shift 214 awk -v n="$n" ' 215 BEGIN { if (n == 0) n = -1 } 216 NR % n == 1 && NR != 1 { print "" } 217 { print; fflush() } 218 ' "$@" 219 } 220 221 # Breathe Lines 5: separate groups of 5 lines with empty lines 222 bl5() { 223 awk ' 224 NR % 5 == 1 && NR != 1 { print "" } 225 { print; fflush() } 226 ' "$@" 227 } 228 229 # process BLocks/paragraphs of non-empty lines with AWK 230 # blawk() { awk -F='' -v RS='' "$@"; } 231 232 # process BLocks/paragraphs of non-empty lines with AWK 233 blawk() { stdbuf -oL awk -F='' -v RS='' "$@"; } 234 235 # Begin-Only Awk 236 boa() { awk "BEGIN { $1; exit }"; } 237 238 # Begin Print Exit 239 bpe() { awk "BEGIN { print $1; exit }"; } 240 241 # split lines using the regex given, turning them into single-item lines 242 breakdown() { 243 local sep="${1:- }" 244 [ $# -gt 0 ] && shift 245 awk -v FPAT="${sep}" '{ for (i = 1; i <= NF; i++) print $i; fflush() }' "$@" 246 } 247 248 # separate groups of 5 lines (by default) with empty lines 249 breathe() { 250 local n="${1:-5}" 251 [ $# -gt 0 ] && shift 252 awk -v n="$n" ' 253 BEGIN { if (n == 0) n = -1 } 254 NR % n == 1 && NR != 1 { print "" } 255 { print; fflush() } 256 ' "$@" 257 } 258 259 # Book-like Side-By-Side lays out text lines into several columns, separating 260 # them with a special symbol. This script lets you see more data at once, as 261 # monitors are wider than tall and most text content has fairly short lines. 262 # 263 # If a column-count isn't given, it's 2 by default, just like with books. If 264 # no named inputs are given, lines are read from the standard input. 265 bsbs() { 266 local num_columns 267 local height 268 local failed 269 local arg 270 271 if [ $# -eq 0 ] && [ ! -p /dev/stdin ]; then 272 awk '/^# +bsbs /, /^$/ { gsub(/^# ?/, ""); print }' "$0" 273 printf "\e[32mno files given and stdin not being piped into\e[0m\n" 274 return 0 275 fi 276 277 num_columns=2 278 if [ "$(echo "$1" | grep -E '^[+-]?[0-9]+$' 2> /dev/null)" ]; then 279 num_columns="$1" 280 shift 281 fi 282 283 if [ "${num_columns}" -lt 1 ]; then 284 num_columns=1 285 fi 286 287 # use the current screen height 288 height="$(tput lines)" 289 290 if [ "${height}" -lt 2 ]; then 291 printf "\e[31mscreen/window is too short to show content\e[0m\n" >&2 292 return 1 293 fi 294 295 # show all non-existing files given 296 failed=0 297 for arg in "$@"; do 298 if [ "${arg}" = "-" ]; then 299 continue 300 fi 301 if [ ! -e "${arg}" ]; then 302 printf "\e[31mno file named \"%s\"\e[0m\n" "${arg}" > /dev/stderr 303 failed=1 304 fi 305 done 306 307 # in case of errors, avoid showing an empty screen 308 if [ "${failed}" -gt 0 ]; then 309 return 1 310 fi 311 312 # allow loading lines from multiple files, ensuring no lines are accidentally 313 # joined across inputs 314 awk 1 "$@" | 315 316 # ignore leading UTF-8 BOMs (byte-order marks) and trailing carriage-returns: 317 # the latter in particular will ruin side-by-side output 318 sed 's-^\xef\xbb\xbf--; s-\r$--' | 319 320 # before laying out lines side-by-side, expand all tabs 321 expand -t 4 | 322 323 # lay things side-by-side, like pages/faces in a book 324 awk -v num_cols="${num_columns}" -v height="${height}" ' 325 BEGIN { 326 inner_rows = height - 2 327 } 328 329 # remember all lines; assumes carriage-returns are already removed 330 { 331 p = NR - 1 332 lines[p] = $0 333 gsub(/\x1b\[[0-9;]*[A-Za-z]/, "") 334 widths[p] = length($0) 335 } 336 337 # round up non-integers 338 function ceil(n) { 339 return (n % 1) ? n - (n % 1) + 1 : n 340 } 341 342 END { 343 # if a single column is enough for all lines, just do it that way 344 if (NR <= inner_rows) { 345 for (i = 0; i < NR; i++) print lines[i] 346 exit 347 } 348 349 # avoid empty trailing columns 350 if (NR < inner_rows * num_cols) num_cols = ceil(NR / inner_rows) 351 # ensure number of columns is valid 352 if (num_cols < 1) num_cols = 1 353 354 for (i = 0; i < NR; i += inner_rows * num_cols) { 355 for (j = 0; j < inner_rows; j++) { 356 for (k = 0; k < num_cols; k++) { 357 w = widths[i + k * inner_rows + j] 358 if (max_widths[k] < w) max_widths[k] = w 359 } 360 } 361 } 362 363 widest = 0 364 for (i in max_widths) { 365 if (widest < max_widths[i]) widest = max_widths[i] 366 } 367 368 total_max_width = 0 369 for (i = 0; i < num_cols; i++) { 370 total_max_width += max_widths[i] 371 } 372 # also count separators, which are 3-items wide 373 if (num_cols > 0) total_max_width += 3 * (num_cols - 1) 374 375 # make separator wide enough to match the length of any output line 376 bottom_sep = "································" 377 for (nsep = 32; nsep < total_max_width; nsep *= 2) { 378 bottom_sep = bottom_sep bottom_sep 379 } 380 # separator is used directly, so match the needed width exactly 381 bottom_sep = substr(bottom_sep, 1, total_max_width) 382 383 # emit lines side by side 384 for (i = 0; i < NR; i += inner_rows * num_cols) { 385 # emit a page-bottom/separator line between pages of columns 386 if (i > 0) print bottom_sep 387 388 for (j = 0; j < inner_rows; j++) { 389 # bottom-pad last page-pair with empty lines, so page-scroll 390 # on viewers like `less` stays in sync with the page bottoms 391 if (NR - i - j <= 0) { 392 print "" 393 continue 394 } 395 396 for (k = 0; k < num_cols; k++) { 397 p = i + k * inner_rows + j 398 l = lines[p] 399 400 if (k > 0) { 401 printf "\x1b[0m █" 402 if (k != num_cols - 1 || l != "") printf " " 403 } 404 405 printf "%s", l 406 407 if (k < num_cols - 1) { 408 pad = max_widths[k] - widths[p] 409 if (pad > 0) printf "%*s", pad, "" 410 } 411 } 412 413 print "\x1b[0m" 414 } 415 } 416 417 # end last page with an empty line, instead of the usual page-sep 418 if (NR > 0) print "" 419 } 420 ' | 421 422 # view the result interactively 423 less -JMKiCRS 424 } 425 426 # show a reverse-sorted tally of all lines read, where ties are sorted 427 # alphabetically, and where trailing bullets are added to quickly make 428 # the tally counts comparable at a glance 429 bully() { 430 awk -v sortcmd="sort -t \"$(printf '\t')\" -rnk2 -k1d" ' 431 # reassure users by instantly showing the header 432 BEGIN { print "value\ttally\tbullets"; fflush() } 433 434 { gsub(/\r$/, ""); tally[$0]++ } 435 436 END { 437 bullet = "•" 438 439 # find the max tally, which is needed to build the bullets-string 440 max = 0 441 for (k in tally) if (max < tally[k]) max = tally[k] 442 443 # make enough bullets for all tallies: this loop makes growing the 444 # string a task with complexity O(n * log n), instead of a naive 445 # O(n**2), which can slow-down things when tallies are high enough 446 bullets = bullet 447 for (n = max; n > 1; n /= 2) bullets = bullets bullets 448 449 # emit unsorted output lines to the sort cmd, which will emit the 450 # final reverse-sorted tally lines 451 for (k in tally) { 452 t = tally[k] 453 s = (t == 1) ? bullet : substr(bullets, 1, t) 454 printf "%s\t%d\t%s\n", k, t, s | sortcmd 455 } 456 } 457 ' "$@" 458 } 459 460 # uppercase the first letter on each line, and lowercase all later letters 461 capitalize() { 462 awk '{ print; fflush() }' "$@" | sed -E -u 's-^(.*)-\L\1-; s-^(.)-\u\1-' 463 } 464 465 # conCATenate Lines guarantees no lines are ever accidentally joined 466 # across inputs, always emitting a line-feed at the end of every line 467 catl() { awk '{ print; fflush() }' "$@"; } 468 469 # Count with AWK: count the times the AWK expression/condition given is true 470 cawk() { 471 local cond="${1:-1}" 472 [ $# -gt 0 ] && shift 473 awk " 474 { low = lower = tolower(\$0) } 475 ${cond} { count++ } 476 END { print count } 477 " "$@" 478 } 479 480 # center-align lines of text, using the current screen width 481 center() { 482 gawk -v width="$(tput cols)" ' 483 { 484 gsub(/\r$/, "") 485 lines[NR] = $0 486 s = $0 487 gsub(/\x1b\[[0-9;]*[A-Za-z]/, "", s) # ANSI style-changers 488 l = length(s) 489 if (maxlen < l) maxlen = l 490 } 491 492 END { 493 n = (width - maxlen) / 2 494 if (n % 1) n = n - (n % 1) 495 fmt = sprintf("%%%ds%%s\n", (n > 0) ? n : 0) 496 for (i = 1; i <= NR; i++) printf fmt, "", lines[i] 497 } 498 ' "$@" 499 } 500 501 # ignore final life-feed from text, if it's the very last byte; also ignore 502 # all trailing carriage-returns 503 choplf() { 504 awk ' 505 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 506 NR > 1 { print ""; fflush() } 507 { gsub(/\r$/, ""); printf "%s", $0; fflush() } 508 ' "$@" 509 } 510 511 # COunt COndition: count how many times the AWK expression given is true 512 coco() { 513 local cond="${1:-1}" 514 [ $# -gt 0 ] && shift 515 awk " 516 { low = lower = tolower(\$0) } 517 ${cond} { count++ } 518 END { print count } 519 " "$@" 520 } 521 522 # split lines using the string given, turning them into single-item lines 523 crumble() { 524 local sep="${1:- }" 525 [ $# -gt 0 ] && shift 526 awk -F "${sep}" '{ for (i = 1; i <= NF; i++) print $i; fflush() }' "$@" 527 } 528 529 # DECAPitate (lines) emits the first line as is, piping all lines after that 530 # to the command given, passing all/any arguments/options to it 531 # decap() { 532 # awk -v cmd="$*" 'NR == 1 { print; fflush() } NR > 1 { print | cmd }' 533 # } 534 535 # ignore whole-comment lines, or just trailing unix-style comments in them 536 decomment() { 537 awk '/^ *#/ { next } { gsub(/ *#.*$/, ""); print; fflush(); }' "$@" 538 } 539 540 # DEDUPlicate prevents lines from appearing more than once 541 dedup() { awk '!c[$0]++ { print; fflush() }' "$@"; } 542 543 # dictionary-define the word given, using an online service 544 define() { 545 local arg 546 local gap=0 547 local options='-JMKiCRS' 548 549 if [ $# -eq 0 ]; then 550 printf "\e[38;2;204;0;0mdefine: no names given\e[0m\n" >&2 551 return 1 552 fi 553 554 if [ $# -eq 1 ]; then 555 options='--header=1 -JMKiCRS' 556 fi 557 558 for arg in "$@"; do 559 [ "${gap}" -gt 0 ] && printf "\n" 560 gap=1 561 printf "\e[7m%-80s\x1b[0m\n" "${arg}" 562 curl -s "dict://dict.org/d:${arg}" | awk ' 563 { gsub(/\r$/, "") } 564 /^151 / { 565 printf "\x1b[38;2;52;101;164m%s\x1b[0m\n", $0; fflush() 566 next 567 } 568 /^[1-9][0-9]{2} / { 569 printf "\x1b[38;2;128;128;128m%s\x1b[0m\n", $0; fflush() 570 next 571 } 572 { print; fflush() } 573 ' 574 done | less "${options}" 575 } 576 577 # convert lines of Space(s)-Separated Values into lines of tab-separated values 578 dessv() { 579 awk ' 580 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 581 582 { 583 gsub(/\r$/, "") 584 for (i = 1; i <= NF; i++) { 585 if (i > 1) printf "\t" 586 printf "%s", $i 587 } 588 printf "\n"; fflush() 589 } 590 ' "$@" 591 } 592 593 # ignore trailing spaces, as well as trailing carriage returns 594 detrail() { awk '{ gsub(/ *\r?$/, ""); print; fflush() }' "$@"; } 595 596 # DIVide 2 numbers 3 ways, including the complement 597 div() { 598 awk -v a="${1:-1}" -v b="${2:-1}" ' 599 BEGIN { 600 gsub(/_/, "", a) 601 gsub(/_/, "", b) 602 if (a > b) { c = a; a = b; b = c } 603 c = 1 - a / b 604 if (0 <= c && c <= 1) printf "%f\n%f\n%f\n", a / b, b / a, c 605 else printf "%f\n%f\n", a / b, b / a 606 exit 607 }' 608 } 609 610 # ignore/remove all matched regexes given on all stdin lines 611 drop() { 612 awk ' 613 BEGIN { for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } } 614 { 615 for (i = 1; i < ARGC; i++) gsub(e[i], "") 616 print; fflush() 617 } 618 ' "${@:-\r$}" 619 } 620 621 # Drop Tab Separated-Values, ignoring all columns given, and keeping all the 622 # others. The column names given are matched using the first line from stdin. 623 # Matching works by first trying exact matching, then by case-insensitive 624 # matching, when there's no exact match, finally giving up when there's no 625 # match either way. 626 dtsv() { 627 awk -F "\t" ' 628 BEGIN { 629 for (i = 1; i < ARGC; i++) { 630 colnames[++n] = ARGV[i] 631 lownames[n] = tolower(ARGV[i]) 632 delete ARGV[i] 633 } 634 } 635 636 function findcol(name, lowname, i) { 637 for (i = 1; i <= NF; i++) if (name == $i) return i 638 for (i = 1; i <= NF; i++) if (lowname == tolower($i)) return i 639 640 if (1 <= name && name <= NF) return name + 0 641 if (name < 0 && -name <= NF) return NF + name + 1 642 643 return 0 644 } 645 646 { gsub(/\r$/, "") } 647 648 NR == 1 { 649 numcols = NF 650 for (i = 1; i <= numcols; i++) keep[i] = 1 651 652 colsused = numcols 653 for (i = 1; i <= numcols; i++) { 654 j = findcol(colnames[i], lownames[i]) 655 if (j > 0) { 656 keep[j] = 0 657 colsused-- 658 } 659 660 if (j == 0) { 661 fmt = "\x1b[31mno column match for \"%s\"\x1b[0m\n" 662 printf(fmt, colnames[i]) > "/dev/stderr" 663 errors++ 664 } 665 } 666 667 if (errors > 0) exit 1 668 if (colsused == 0) exit 669 } 670 671 { 672 c = 0 673 for (i = 1; i <= numcols; i++) { 674 if (keep[i] == 1) { 675 if (c > 0) printf "\t" 676 printf "%s", $i 677 c++ 678 } 679 } 680 681 printf "\n"; fflush() 682 } 683 ' "$@" 684 } 685 686 # European Central Bank Latest Exchange Rates, as tab-separated values 687 ecbler() { 688 # load data from the european central bank website 689 wget -q -O - 'https://www.ecb.europa.eu/stats/eurofxref/eurofxref.zip' | 690 # decompress payload 691 gzip -d | 692 # turn CSV into TSV, ignoring trailing empty fields 693 sed -E 's- *\r$--; s-,$--; s-, *-\t-g; s-\t+$--' | 694 # reciprocate dividers into multipliers 695 awk -F "\t" ' 696 NR == 1 { print } 697 NR == 2 { 698 printf "%s", $1 699 for (i = 2; i <= NF; i++) printf("\t%.6f", 1.0 / $i) 700 printf "\n" 701 } 702 ' | 703 # pick a subset of the columns 704 awk -F "\t" -v OFS="\t" \ 705 '{ print $1, $16, $17, $18, $19, $23, $25, $7, $2 }' | 706 # turn dates into the yyyy/mm/dd format 707 sed -E \ 708 -e 's-([0-9]+) ([A-Za-z]{3})[a-z]+ ([0-9]+)-\3/\L\2/\1-g' \ 709 -e 's-jan-01-g' -e 's-feb-02-g' -e 's-mar-03-g' \ 710 -e 's-apr-04-g' -e 's-may-05-g' -e 's-jun-06-g' \ 711 -e 's-jul-07-g' -e 's-aug-08-g' -e 's-sep-09-g' \ 712 -e 's-oct-10-g' -e 's-nov-11-g' -e 's-dec-12-g' 713 } 714 715 # fix lines, ignoring leading UTF-8_BOMs (byte-order-marks) on each input's 716 # first line, turning all end-of-line CRLF byte-pairs into single line-feeds, 717 # and ensuring each input's last line ends with a line-feed; trailing spaces 718 # are also ignored 719 fixlines() { 720 awk ' 721 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 722 { gsub(/ *\r?$/, ""); print; fflush() } 723 ' "$@" 724 } 725 726 # convert FeeT into meters 727 ft() { 728 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 729 awk '/./ { printf "%.2f\n", 0.3048 * $0; fflush() }' 730 } 731 732 # convert FeeT² (squared) into meters² 733 ft2() { 734 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 735 awk '/./ { printf "%.2f\n", 0.09290304 * $0 }' 736 } 737 738 # convert a mix of FeeT and INches into meters 739 ftin() { 740 local ft="${1:-0}" 741 ft="$(echo "${ft}" | sed 's-_--g')" 742 local in="${2:-0}" 743 in="$(echo "${in}" | sed 's-_--g')" 744 awk "BEGIN { print 0.3048 * ${ft} + 0.0254 * ${in}; exit }" 745 } 746 747 # convert GALlons into liters 748 gal() { 749 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 750 awk '/./ { printf "%.2f\n", 3.785411784 * $0; fflush() }' 751 } 752 753 # convert binary GigaBytes into bytes 754 gb() { 755 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 756 awk '/./ { printf "%.4f\n", 1073741824 * $0; fflush() }' | 757 sed 's-\.00*$--' 758 } 759 760 # Good, Bad, Meh colors lines using up to 3 regular expressions, keeping all 761 # other input lines verbatim 762 gbm() { 763 local good="$1" 764 local bad="$2" 765 local meh="$3" 766 [ $# -gt 0 ] && shift 767 [ $# -gt 0 ] && shift 768 [ $# -gt 0 ] && shift 769 770 awk ' 771 BEGIN { 772 gotgood = ARGC > 1 && ARGV[1] != "" 773 gotbad = ARGC > 2 && ARGV[2] != "" 774 gotmeh = ARGC > 3 && ARGV[3] != "" 775 good = ARGV[1] 776 bad = ARGV[2] 777 meh = ARGV[3] 778 delete ARGV[1] 779 delete ARGV[2] 780 delete ARGV[3] 781 } 782 783 gotgood && $0 ~ good { 784 # code to use a color-blind-friendlier blue, instead of green 785 # gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;0;95;215m") 786 # printf "\x1b[38;2;0;95;215m%s\x1b[0m\n", $0 787 gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;0;135;95m") 788 printf "\x1b[38;2;0;135;95m%s\x1b[0m\n", $0; fflush() 789 next 790 } 791 792 gotbad && $0 ~ bad { 793 gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;204;0;0m") 794 printf "\x1b[38;2;204;0;0m%s\x1b[0m\n", $0; fflush() 795 next 796 } 797 798 gotmeh && $0 ~ meh { 799 gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;168;168;168m") 800 printf "\x1b[38;2;168;168;168m%s\x1b[0m\n", $0; fflush() 801 next 802 } 803 804 { print; fflush() } 805 ' "${good}" "${bad}" "${meh}" "$@" 806 } 807 808 # glue/stick together various lines, only emitting a line-feed at the end; an 809 # optional argument is the output-item-separator, which is empty by default 810 glue() { 811 local sep="${1:-}" 812 [ $# -gt 0 ] && shift 813 awk -v sep="${sep}" ' 814 NR > 1 { printf "%s", sep } 815 { gsub(/\r/, ""); printf "%s", $0; fflush() } 816 END { if (NR > 0) print ""; fflush() } 817 ' "$@" 818 } 819 820 # GRoup via AWK groups lines using common results of the AWK expression given 821 grawk() { 822 local code="${1:-\$0}" 823 [ $# -gt 0 ] && shift 824 825 awk ' 826 { low = lower = tolower($0) } 827 828 { 829 k = '"${code}"' 830 if (!(k in groups)) ordkeys[++okl] = k 831 groups[k][length(groups[k]) + 1] = $0 832 } 833 834 END { 835 for (i = 1; i <= okl; i++) { 836 k = ordkeys[i] 837 n = length(groups[k]) 838 for (j = 1; j <= n; j++) print groups[k][j] 839 } 840 } 841 ' "$@" 842 } 843 844 # Style lines using a GRAY-colored BACKground 845 grayback() { 846 awk ' 847 { 848 gsub(/\x1b\[0m/, "\x1b[0m\x1b[48;2;218;218;218m") 849 printf "\x1b[48;2;218;218;218m%s\x1b[0m\n", $0; fflush() 850 } 851 ' "$@" 852 } 853 854 # Global extended regex SUBstitute, using the AWK function of the same name: 855 # arguments are used as regex/replacement pairs, in that order 856 gsub() { 857 awk ' 858 BEGIN { 859 for (i = 1; i < ARGC; i++) { 860 args[++n] = ARGV[i] 861 delete ARGV[i] 862 } 863 } 864 { 865 for (i = 1; i <= n; i += 2) gsub(args[i], args[i + 1]) 866 print; fflush() 867 } 868 ' "$@" 869 } 870 871 # Highlight (lines) with AWK 872 hawk() { 873 local cond="${1:-1}" 874 [ $# -gt 0 ] && shift 875 awk ' 876 { low = lower = tolower($0) } 877 '"${cond}"' { 878 gsub(/\x1b\[0m/, "\x1b[0m\x1b[7m") 879 printf "\x1b[7m%s\x1b[0m\n", $0; fflush() 880 next 881 } 882 { print; fflush() } 883 ' "$@" 884 } 885 886 # show each byte as a pair of HEXadecimal (base-16) symbols 887 hexify() { 888 cat "$@" | od -x -A n | 889 awk '{ gsub(/ +/, ""); printf "%s", $0; fflush() } END { printf "\n" }' 890 } 891 892 # highlight lines 893 highlight() { 894 awk ' 895 { 896 gsub(/\x1b\[0m/, "\x1b[0m\x1b[7m") 897 printf "\x1b[7m%s\x1b[0m\n", $0; fflush() 898 } 899 ' "$@" 900 } 901 902 # HIghlight LEAK emits/tees input both to stdout and stderr, highlighting what 903 # it emits to stderr using an ANSI-style; this cmd is useful to `debug` pipes 904 # involving several steps 905 hileak() { 906 awk ' 907 { 908 gsub(/\x1b\[[0-9;]*[A-Za-z]/, "") 909 printf "\x1b[7m%s\x1b[0m\n", $0 > "/dev/stderr" 910 print; fflush() 911 } 912 ' "$@" 913 } 914 915 # convert seconds into a colon-separated Hours-Minutes-Seconds triple 916 hms() { 917 echo "${@:-0}" | sed -E 's-_--g; s- +-\n-g' | awk '/./ { 918 x = $0 919 h = (x - x % 3600) / 3600 920 m = (x % 3600) / 60 921 s = x % 60 922 printf "%02d:%02d:%05.2f\n", h, m, s; fflush() 923 }' 924 } 925 926 # find all hyperlinks inside HREF attributes in the input text 927 href() { 928 awk ' 929 BEGIN { e = "href=\"[^\"]+\"" } 930 { 931 for (s = $0; match(s, e); s = substr(s, RSTART + RLENGTH)) { 932 print substr(s, RSTART + 6, RLENGTH - 7); fflush() 933 } 934 } 935 ' "$@" 936 } 937 938 # Index all lines starting from 0, using a tab right after each line number 939 i() { awk '{ printf "%d\t%s\n", NR - 1, $0; fflush() }' "$@"; } 940 941 # avoid/ignore lines which case-insensitively match any of the regexes given 942 iavoid() { 943 gawk ' 944 BEGIN { 945 IGNORECASE = 1 946 for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } 947 } 948 949 { 950 for (i = 1; i < ARGC; i++) if ($0 ~ e[i]) next 951 print; fflush(); got++ 952 } 953 954 END { exit(got == 0) } 955 ' "${@:-^\r?$}" 956 } 957 958 # case-Insensitively DEDUPlicate prevents lines from appearing more than once 959 idedup() { awk '!c[tolower($0)]++ { print; fflush() }' "$@"; } 960 961 # ignore/remove all case-insensitively matched regexes given on all stdin lines 962 idrop() { 963 gawk ' 964 BEGIN { 965 IGNORECASE = 1 966 for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } 967 } 968 969 { 970 for (i = 1; i < ARGC; i++) gsub(e[i], "") 971 print; fflush() 972 } 973 ' "${@:-\r$}" 974 } 975 976 # only keep lines which case-insensitively match any of the regexes given 977 imatch() { 978 gawk ' 979 BEGIN { 980 IGNORECASE = 1 981 for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } 982 } 983 984 { 985 for (i = 1; i < ARGC; i++) { 986 if ($0 ~ e[i]) { 987 print; fflush() 988 got++ 989 next 990 } 991 } 992 } 993 994 END { exit(got == 0) } 995 ' "${@:-[^\r]}" 996 } 997 998 # start each non-empty line with extra n spaces 999 indent() { 1000 awk ' 1001 BEGIN { 1002 n = ARGV[1] + 0 1003 delete ARGV[1] 1004 fmt = sprintf("%%%ds%%s\n", (n > 0) ? n : 0) 1005 } 1006 1007 /^\r?$/ { print ""; fflush(); next } 1008 { gsub(/\r$/, ""); printf(fmt, "", $0); fflush() } 1009 ' "$@" 1010 } 1011 1012 # emit each word-like item from each input line on its own line; when a file 1013 # has tabs on its first line, items are split using tabs alone, which allows 1014 # items to have spaces in them 1015 items() { 1016 awk ' 1017 FNR == 1 { FS = ($0 ~ /\t/) ? "\t" : " "; $0 = $0 } 1018 { gsub(/\r$/, ""); for (i = 1; i <= NF; i++) print $i; fflush() } 1019 ' "$@" 1020 } 1021 1022 # case-insensitively deduplicate lines, keeping them in their original order: 1023 # the checking/matching is case-insensitive, but each first match is output 1024 # exactly as is 1025 iunique() { awk '!c[tolower($0)]++ { print; fflush() }' "$@"; } 1026 1027 # Judge with AWK colors lines green/red/gray, using up to 3 AWK conditions 1028 jawk() { 1029 local good="${1:-0}" 1030 local bad="${2:-0}" 1031 local meh="${3:-0}" 1032 1033 [ $# -gt 0 ] && shift 1034 [ $# -gt 0 ] && shift 1035 [ $# -gt 0 ] && shift 1036 1037 awk ' 1038 { low = lower = tolower($0) } 1039 1040 '"${good}"' { 1041 # code to use a color-blind-friendlier blue, instead of green 1042 # gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;0;95;215m") 1043 # printf "\x1b[38;2;0;95;215m%s\x1b[0m\n", $0; fflush() 1044 1045 gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;0;135;95m") 1046 printf "\x1b[38;2;0;135;95m%s\x1b[0m\n", $0; fflush() 1047 next 1048 } 1049 1050 '"${bad}"' { 1051 gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;204;0;0m") 1052 printf "\x1b[38;2;204;0;0m%s\x1b[0m\n", $0; fflush() 1053 next 1054 } 1055 1056 '"${meh}"' { 1057 gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;168;168;168m") 1058 printf "\x1b[38;2;168;168;168m%s\x1b[0m\n", $0; fflush() 1059 next 1060 } 1061 1062 { print; fflush() } 1063 ' "$@" 1064 } 1065 1066 # show a `dad` JOKE from the web, sometimes even a very funny one 1067 joke() { 1068 curl --show-error -s https://icanhazdadjoke.com | fold -s | 1069 awk '{ gsub(/ *\r?$/, ""); print }' 1070 } 1071 1072 # convert binary KiloBytes into bytes 1073 kb() { 1074 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 1075 awk '/./ { printf "%.2f\n", 1024 * $0; fflush() }' | 1076 sed 's-\.00*$--' 1077 } 1078 1079 # Line xARGS: `xargs` using line separators, which handles filepaths 1080 # with spaces, as long as the standard input has 1 path per line 1081 largs() { 1082 awk -v ORS='\000' ' 1083 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1084 { gsub(/\r$/, ""); print; fflush() } 1085 ' | xargs -0 "$@" 1086 } 1087 1088 # convert pounds (LB) into kilograms 1089 lb() { 1090 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 1091 awk '/./ { printf "%.2f\n", 0.45359237 * $0; fflush() }' 1092 } 1093 1094 # convert a mix of pounds (LB) and weight-ounces (OZ) into kilograms 1095 lboz() { 1096 local lb="${1:-0}" 1097 lb="$(echo "${lb}" | sed 's-_--g')" 1098 local oz="${2:-0}" 1099 oz="$(echo "${oz}" | sed 's-_--g')" 1100 awk "BEGIN { print 0.45359237 * ${lb} + 0.028349523 * ${oz}; exit }" 1101 } 1102 1103 # ensure lines are never accidentally joined across files, by always emitting 1104 # a line-feed at the end of each line 1105 lines() { awk '{ print; fflush() }' "$@"; } 1106 1107 # regroup adjacent lines into n-item tab-separated lines 1108 lineup() { 1109 local n="${1:-0}" 1110 [ $# -gt 0 ] && shift 1111 1112 if [ "$n" -le 0 ]; then 1113 awk ' 1114 NR > 1 { printf "\t" } 1115 { printf "%s", $0; fflush() } 1116 END { if (NR > 0) print "" } 1117 ' "$@" 1118 return $? 1119 fi 1120 1121 awk -v n="$n" ' 1122 NR % n != 1 && n > 1 { printf "\t" } 1123 { printf "%s", $0; fflush() } 1124 NR % n == 0 { print ""; fflush() } 1125 END { if (NR % n != 0) print "" } 1126 ' "$@" 1127 } 1128 1129 # LOwercase line, check (awk) COndition: on each success, the original line 1130 # is output with its original letter-casing, as its lower-cased version is 1131 # only a convenience meant for the condition 1132 loco() { 1133 local cond="${1:-1}" 1134 [ $# -gt 0 ] && shift 1135 awk " 1136 { 1137 line = orig = original = \$0 1138 low = lower = tolower(\$0) 1139 \$0 = lower 1140 } 1141 ${cond} { print line; fflush() } 1142 " "$@" 1143 } 1144 1145 # LOWercase all ASCII symbols 1146 low() { awk '{ print tolower($0); fflush() }' "$@"; } 1147 1148 # LOWERcase all ASCII symbols 1149 lower() { awk '{ print tolower($0); fflush() }' "$@"; } 1150 1151 # only keep lines which match any of the regexes given 1152 match() { 1153 awk ' 1154 BEGIN { for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } } 1155 1156 { 1157 for (i = 1; i < ARGC; i++) { 1158 if ($0 ~ e[i]) { 1159 print; fflush() 1160 got++ 1161 next 1162 } 1163 } 1164 } 1165 1166 END { exit(got == 0) } 1167 ' "${@:-[^\r]}" 1168 } 1169 1170 # MAX Width truncates lines up to the given number of items/bytes given, or up 1171 # to 80 by default; output lines end with an ANSI reset-code, in case input 1172 # lines use ANSI styles 1173 maxw() { 1174 local maxwidth="${1:-80}" 1175 [ $# -gt 0 ] && shift 1176 awk -v maxw="${maxwidth}" ' 1177 { 1178 gsub(/\r$/, "") 1179 printf("%s\x1b[0m\n", substr($0, 1, maxw)); fflush() 1180 } 1181 ' "$@" 1182 } 1183 1184 # convert binary MegaBytes into bytes 1185 mb() { 1186 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 1187 awk '/./ { printf "%.2f\n", 1048576 * $0; fflush() }' | 1188 sed 's-\.00*$--' 1189 } 1190 1191 # convert MIles into kilometers 1192 mi() { 1193 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 1194 awk '/./ { printf "%.2f\n", 1.609344 * $0; fflush() }' 1195 } 1196 1197 # convert MIles² (squared) into kilometers² 1198 mi2() { 1199 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 1200 awk '/./ { printf "%.2f\n", 2.5899881103360 * $0 }' 1201 } 1202 1203 # MINimize DECimalS ignores all trailing decimal zeros in numbers, and even 1204 # the decimal dots themselves, when decimals in a number are all zeros 1205 mindecs() { 1206 awk '{ gsub(/\r$/, ""); print; fflush() }' "$@" | 1207 sed -u -E 's-([0-9]+)\.0+-\1-g; s-([0-9]+\.[0-9]*[1-9])0+-\1-g' 1208 } 1209 1210 # convert Miles Per Hour into kilometers per hour 1211 mph() { 1212 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 1213 awk '/./ { printf "%.2f\n", 1.609344 * $0 }' 1214 } 1215 1216 # Number all lines, using a tab right after each line number 1217 n() { awk '{ printf "%d\t%s\n", NR, $0; fflush() }' "$@"; } 1218 1219 # convert Nautical MIles into kilometers 1220 nmi() { 1221 echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' | 1222 awk '/./ { printf "%.2f\n", 1.852 * $0; fflush() }' 1223 } 1224 1225 # Nice Ps shows/lists all current processes shown by `ps` 1226 np() { 1227 local res 1228 local code 1229 # res="$(ps "${@:-auxf}")" 1230 res="$(ps "${@:-aux}")" 1231 code=$? 1232 if [ "${code}" -ne 0 ]; then 1233 return "${code}" 1234 fi 1235 1236 echo "${res}" | awk ' 1237 BEGIN { 1238 d = strftime("%a %b %d") 1239 t = strftime("%H:%M:%S") 1240 # printf "\x1b[7m%30s%s %s%30s\x1b[0m\n\n", "", d, t, "" 1241 fmt = "\x1b[38;2;128;128;128m\x1b[7m%30s%s %s%30s\x1b[0m\n\n" 1242 printf fmt, "", d, t, "" 1243 } 1244 1245 (NR - 1) % 5 == 1 && NR > 1 { print "" } 1246 1247 $1 == "root" { 1248 gsub(/^/, "\x1b[38;2;52;101;164m") 1249 gsub(/ +/, "&\x1b[0m\x1b[38;2;52;101;164m") 1250 gsub(/$/, "\x1b[0m") 1251 } 1252 1253 { 1254 gsub(/ \? /, "\x1b[38;2;135;135;175m&\x1b[0m") 1255 gsub(/0\.0/, "\x1b[38;2;135;135;175m&\x1b[0m") 1256 gsub(/0:00/, "\x1b[38;2;135;135;175m&\x1b[0m") 1257 printf "%3d %s\n", NR - 1, $0 1258 } 1259 ' | less -JMKiCRS 1260 } 1261 1262 # NULl-terminate LINES ends each stdin line with a null byte, instead of a 1263 # line-feed byte 1264 nullines() { 1265 awk -v ORS='\000' ' 1266 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1267 { gsub(/\r$/, ""); print; fflush() } 1268 ' "$@" 1269 } 1270 1271 # Print Awk expression 1272 pa() { awk "BEGIN { print ${1:-0}; exit }"; } 1273 1274 # Paragraph AWK runs `awk` in block/paragraph/multiline input-mode 1275 pawk() { stdbuf -oL awk -F='' -v RS='' "$@"; } 1276 1277 # pick lines, using all the 1-based line-numbers given 1278 picklines() { 1279 awk ' 1280 BEGIN { m = ARGC - 1; if (ARGC == 1) exit 0 } 1281 BEGIN { for (i = 1; i <= m; i++) { p[i] = ARGV[i]; delete ARGV[i] } } 1282 { l[++n] = $0 } 1283 END { 1284 for (i = 1; i <= m; i++) { 1285 j = p[i] 1286 if (j < 0) j += NR + 1 1287 if (0 < j && j <= NR) print l[j] 1288 } 1289 } 1290 ' "$@" 1291 } 1292 1293 # make text plain, by ignoring ANSI terminal styling 1294 plain() { awk '{ gsub(/\x1b\[[0-9;]*[A-Za-z]/, ""); print; fflush() }' "$@"; } 1295 1296 # PREcede (input) MEMO, prepends a first highlighted line to stdin lines 1297 prememo() { 1298 awk ' 1299 BEGIN { 1300 if (ARGC > 1) printf "\x1b[7m" 1301 for (i = 1; i < ARGC; i++) { 1302 if (i > 1) printf " " 1303 printf "%s", ARGV[i] 1304 delete ARGV[i] 1305 } 1306 if (ARGC > 1) printf "\x1b[0m\n" 1307 fflush() 1308 } 1309 { print; fflush() } 1310 ' "$@" 1311 } 1312 1313 # start by joining all arguments given as a tab-separated-items line of output, 1314 # followed by all lines from stdin verbatim 1315 pretsv() { 1316 awk ' 1317 BEGIN { 1318 for (i = 1; i < ARGC; i++) { 1319 if (i > 1) printf "\t" 1320 printf "%s", ARGV[i] 1321 delete ARGV[i] 1322 } 1323 if (ARGC > 1) printf "\n" 1324 fflush() 1325 } 1326 { print; fflush() } 1327 ' "$@" 1328 } 1329 1330 # show/list all current processes 1331 processes() { 1332 local res 1333 res="$(ps aux)" 1334 echo "${res}" | awk '!/ps aux$/' | sed -E \ 1335 -e 's- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1' \ 1336 -e 's- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1' 1337 } 1338 1339 # Pick Tab Separated-Values, trying to match the column names given using the 1340 # first line from stdin. Matching works by first trying exact matching, then 1341 # case-insensitive matching, when there's no exact match, finally giving up 1342 # when there's no match either way. 1343 ptsv() { 1344 awk -F "\t" ' 1345 BEGIN { 1346 for (i = 1; i < ARGC; i++) { 1347 colnames[++n] = ARGV[i] 1348 lownames[n] = tolower(ARGV[i]) 1349 delete ARGV[i] 1350 } 1351 } 1352 1353 function findcol(name, lowname, i) { 1354 for (i = 1; i <= NF; i++) if (name == $i) return i 1355 for (i = 1; i <= NF; i++) if (lowname == tolower($i)) return i 1356 1357 if (1 <= name && name <= NF) return name + 0 1358 if (name < 0 && -name <= NF) return NF + name + 1 1359 1360 return 0 1361 } 1362 1363 { gsub(/\r$/, "") } 1364 1365 NR == 1 { 1366 for (i = 1; i <= NF; i++) { 1367 j = findcol(colnames[i], lownames[i]) 1368 if (j > 0) pick[++numcols] = j 1369 1370 if (j == 0) { 1371 fmt = "\x1b[31mno column match for \"%s\"\x1b[0m\n" 1372 printf(fmt, colnames[i]) > "/dev/stderr" 1373 errors++ 1374 } 1375 } 1376 1377 if (errors > 0) exit 1 1378 if (numcols == 0) exit 1379 } 1380 1381 { 1382 for (i = 1; i <= numcols; i++) { 1383 if (i > 1) printf "\t" 1384 printf "%s", $(pick[i]) 1385 } 1386 1387 printf "\n"; fflush() 1388 } 1389 ' "$@" 1390 } 1391 1392 # Red AWK styles lines satisfying an AWK condition/expression red, keeping all 1393 # other lines the same 1394 rawk() { 1395 local cond="${1:-1}" 1396 [ $# -gt 0 ] && shift 1397 1398 awk ' 1399 { low = lower = tolower($0) } 1400 1401 '"${cond}"' { 1402 gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;204;0;0m") 1403 printf "\x1b[38;2;204;0;0m%s\x1b[0m\n", $0; fflush() 1404 next 1405 } 1406 1407 { print; fflush() } 1408 ' "$@" 1409 } 1410 1411 # Realign all detected columns, right-aligning any detected numbers in any 1412 # column. 1413 realign() { 1414 awk ' 1415 function match_number(v) { 1416 return match(v, /^[+-]?[0-9]+(\.[0-9]+)?$/) 1417 } 1418 1419 function match_dot_digits(v) { 1420 return match(v, /\.[0-9]+$/) 1421 } 1422 1423 { gsub(/\r$/, "") } 1424 1425 FNR == 1 { 1426 FS = ($0 ~ /\t/) ? "\t" : " " 1427 $0 = $0 1428 } 1429 1430 { 1431 for (i = 1; i <= NF; i++) { 1432 data[NR][i] = $i 1433 1434 if (match_number($i)) { 1435 if (match_dot_digits($i)) { 1436 dd = RLENGTH 1437 if (dot_decs[i] < dd) dot_decs[i] = dd 1438 iw = RSTART - 1 1439 if (int_widths[i] < iw) int_widths[i] = iw 1440 } else { 1441 w = length($i) 1442 if (int_widths[i] < w) int_widths[i] = w 1443 } 1444 1445 continue 1446 } 1447 1448 w = length($i) 1449 if (widths[i] < w) widths[i] = w 1450 } 1451 } 1452 1453 END { 1454 # fix column-widths using the number-padding info 1455 for (i = 1; i <= NF; i++) { 1456 w = int_widths[i] + dot_decs[i] 1457 if (widths[i] < w) widths[i] = w 1458 } 1459 1460 for (i = 1; i <= NR; i++) { 1461 last = length(data[i]) 1462 1463 for (j = 1; j <= last; j++) { 1464 if (j > 1) printf " " # put 2-space gaps between columns 1465 1466 v = data[i][j] 1467 1468 if (!match_number(v)) { 1469 # avoid adding trailing spaces at the end of lines 1470 printf "%*s", (j == last) ? 0 : -widths[j], v 1471 continue 1472 } 1473 1474 w = length(v) 1475 if (match_dot_digits(v)) { 1476 dd = RLENGTH 1477 iw = RSTART - 1 1478 } else { 1479 dd = 0 1480 iw = w 1481 } 1482 1483 dpad = dot_decs[j] - dd 1484 ipad = int_widths[j] - iw 1485 if (ipad < 0) ipad = 0 1486 lpad = widths[j] - (ipad + w + dpad) 1487 if (lpad < 0) lpad = 0 1488 1489 # avoid adding trailing spaces at the end of lines 1490 if (j == last) dpad = 0 1491 1492 printf "%*s%*s%s%*s", lpad, "", ipad, "", v, dpad, "" 1493 } 1494 1495 printf "\n" 1496 } 1497 } 1498 ' "$@" 1499 } 1500 1501 # reflow/trim lines of prose (text) to improve its legibility: it's especially 1502 # useful when the text is pasted from web-pages being viewed in reader mode 1503 reprose() { 1504 local w="${1:-80}" 1505 [ $# -gt 0 ] && shift 1506 awk ' 1507 FNR == 1 && NR > 1 { print "" } 1508 { gsub(/\r$/, ""); print; fflush() } 1509 ' "$@" | fold -s -w "$w" | sed -u -E 's- *\r?$--' 1510 } 1511 1512 # Realign Tab Separated-Values, by padding with spaces to match each column's 1513 # widest value, right-aligning all numbers. 1514 rtsv() { 1515 awk -F "\t" ' 1516 function match_number(v) { 1517 return match(v, /^[+-]?[0-9]+(\.[0-9]+)?$/) 1518 } 1519 1520 function match_dot_digits(v) { 1521 return match(v, /\.[0-9]+$/) 1522 } 1523 1524 { 1525 gsub(/\r$/, "") 1526 1527 for (i = 1; i <= NF; i++) { 1528 data[NR][i] = $i 1529 1530 if (match_number($i)) { 1531 if (match_dot_digits($i)) { 1532 dd = RLENGTH 1533 if (dot_decs[i] < dd) dot_decs[i] = dd 1534 iw = RSTART - 1 1535 if (int_widths[i] < iw) int_widths[i] = iw 1536 } else { 1537 w = length($i) 1538 if (int_widths[i] < w) int_widths[i] = w 1539 } 1540 1541 continue 1542 } 1543 1544 w = length($i) 1545 if (widths[i] < w) widths[i] = w 1546 } 1547 } 1548 1549 END { 1550 # fix column-widths using the number-padding info 1551 for (i = 1; i <= NF; i++) { 1552 w = int_widths[i] + dot_decs[i] 1553 if (widths[i] < w) widths[i] = w 1554 } 1555 1556 for (i = 1; i <= NR; i++) { 1557 last = length(data[i]) 1558 1559 for (j = 1; j <= last; j++) { 1560 if (j > 1) printf " " # put 2-space gaps between columns 1561 1562 v = data[i][j] 1563 1564 if (!match_number(v)) { 1565 # avoid adding trailing spaces at the end of lines 1566 printf "%*s", (j == last) ? 0 : -widths[j], v 1567 continue 1568 } 1569 1570 w = length(v) 1571 if (match_dot_digits(v)) { 1572 dd = RLENGTH 1573 iw = RSTART - 1 1574 } else { 1575 dd = 0 1576 iw = w 1577 } 1578 1579 dpad = dot_decs[j] - dd 1580 ipad = int_widths[j] - iw 1581 if (ipad < 0) ipad = 0 1582 lpad = widths[j] - (ipad + w + dpad) 1583 if (lpad < 0) lpad = 0 1584 1585 # avoid adding trailing spaces at the end of lines 1586 if (j == last) dpad = 0 1587 1588 printf "%*s%*s%s%*s", lpad, "", ipad, "", v, dpad, "" 1589 } 1590 1591 printf "\n" 1592 } 1593 } 1594 ' "$@" 1595 } 1596 1597 # Show Files (and folders), coloring folders and links 1598 sf() { 1599 local arg 1600 local gap=0 1601 local options='-JMKiCRS' 1602 1603 if [ $# -le 1 ]; then 1604 options='--header=1 -JMKiCRS' 1605 fi 1606 1607 for arg in "${@:-.}"; do 1608 [ "${gap}" -gt 0 ] && printf "\n" 1609 printf "\e[7m%s\e[0m\n\n" "$(realpath "${arg}")" 1610 gap=1 1611 1612 ls -al --file-type --color=never --time-style iso "${arg}" | awk ' 1613 BEGIN { 1614 drep = "\x1b[38;2;0;135;255m\x1b[48;2;228;228;228m&\x1b[0m" 1615 lrep = "\x1b[38;2;0;135;95m\x1b[48;2;228;228;228m&\x1b[0m" 1616 } 1617 1618 NR < 4 { next } 1619 (NR - 3) % 5 == 1 && (NR - 3) > 1 { print "" } 1620 1621 { 1622 gsub(/^(d[rwx-]+)/, drep) 1623 gsub(/^(l[rwx-]+)/, lrep) 1624 printf "%6d %s\n", NR - 3, $0; fflush() 1625 } 1626 ' 1627 done | less "${options}" 1628 } 1629 1630 # Show File Sizes 1631 sfs() { 1632 # turn arg-list into single-item lines 1633 printf "%s\x00" "$@" | 1634 # calculate file-sizes, and reverse-sort results 1635 xargs -0 wc -c | sort -rn | 1636 # add/realign fields to improve legibility 1637 awk ' 1638 # start output with a header-like line, and add a MiB field 1639 BEGIN { printf "%6s %10s %8s name\n", "n", "bytes", "MiB"; fflush() } 1640 # make table breathe with empty lines, so tall outputs are readable 1641 (NR - 1) % 5 == 1 && NR > 1 { print "" } 1642 # emit regular output lines 1643 { 1644 printf "%6d %10d %8.2f ", NR - 1, $1, $1 / 1048576 1645 # first field is likely space-padded 1646 gsub(/^ */, "") 1647 # slice line after the first field, as filepaths can have spaces 1648 $0 = substr($0, length($1) + 1) 1649 # first field is likely space-padded 1650 gsub(/^ /, "") 1651 printf "%s\n", $0; fflush() 1652 } 1653 ' | 1654 # make zeros in the MiB field stand out with a special color 1655 awk ' 1656 { 1657 gsub(/ 00*\.00* /, "\x1b[38;2;135;135;175m&\x1b[0m") 1658 print; fflush() 1659 } 1660 ' | 1661 # make result interactively browsable 1662 less -JMKiCRS 1663 } 1664 1665 # SHell-QUOTE each line from the input(s): this is useful to make lines of 1666 # single-filepaths compatible with `xargs`, since standard shell settings 1667 # get in the way of filepaths with spaces and other special symbols in them 1668 shquote() { 1669 awk ' 1670 { 1671 s = $0 1672 gsub(/\r$/, "", s) 1673 gsub(/\\/, "\\\\", s) 1674 gsub(/"/, "\\\"", s) 1675 gsub(/`/, "\\`", s) 1676 gsub(/\$/, "\\$", s) 1677 printf "\"%s\"\n", s; fflush() 1678 } 1679 ' "$@" 1680 } 1681 1682 # emit the first line as is, sorting all lines after that, using the 1683 # `sort` command, passing all/any arguments/options to it 1684 sortrest() { 1685 awk -v sort="sort $*" ' 1686 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1687 { gsub(/\r$/, "") } 1688 NR == 1 { print; fflush() } 1689 NR > 1 { print | sort } 1690 ' 1691 } 1692 1693 # SORt Tab-Separated Values: emit the first line as is, sorting all lines after 1694 # that, using the `sort` command in TSV (tab-separated values) mode, passing 1695 # all/any arguments/options to it 1696 sortsv() { 1697 awk -v sort="sort -t \"$(printf '\t')\" $*" ' 1698 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1699 { gsub(/\r$/, "") } 1700 NR == 1 { print; fflush() } 1701 NR > 1 { print | sort } 1702 ' 1703 } 1704 1705 # ignore leading spaces, trailing spaces, even runs of multiple spaces 1706 # in the middle of lines, as well as trailing carriage returns 1707 squeeze() { 1708 awk ' 1709 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1710 { 1711 gsub(/^ +| *\r?$/, "") 1712 gsub(/ *\t */, "\t") 1713 gsub(/ +/, " ") 1714 print; fflush() 1715 } 1716 ' "$@" 1717 } 1718 1719 # SQUeeze and stOMP, by ignoring leading spaces, trailing spaces, even runs 1720 # of multiple spaces in the middle of lines, as well as trailing carriage 1721 # returns, while also turning runs of empty lines into single empty lines, 1722 # and ignoring leading/trailing empty lines, effectively also `squeezing` 1723 # lines vertically 1724 squomp() { 1725 awk ' 1726 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1727 /^\r?$/ { empty = 1; next } 1728 empty { if (n > 0) print ""; empty = 0 } 1729 { 1730 gsub(/^ +| *\r?$/, "") 1731 gsub(/ *\t */, "\t") 1732 gsub(/ +/, " ") 1733 print; fflush() 1734 n++ 1735 } 1736 ' "$@" 1737 } 1738 1739 # turn runs of empty lines into single empty lines, effectively squeezing 1740 # paragraphs vertically, so to speak; runs of empty lines both at the start 1741 # and at the end are ignored 1742 stomp() { 1743 awk ' 1744 /^\r?$/ { empty = 1; next } 1745 empty { if (n > 0) print ""; empty = 0 } 1746 { print; fflush(); n++ } 1747 ' "$@" 1748 } 1749 1750 # STRike-thru (lines) with AWK 1751 strawk() { 1752 local cond="${1:-1}" 1753 [ $# -gt 0 ] && shift 1754 awk ' 1755 { low = lower = tolower($0) } 1756 '"${cond}"' { 1757 gsub(/\x1b\[0m/, "\x1b[0m\x1b[9m") 1758 printf "\x1b[9m%s\x1b[0m\n", $0; fflush() 1759 next 1760 } 1761 { print; fflush() } 1762 ' "$@" 1763 } 1764 1765 # Sort Tab-Separated Values: emit the first line as is, sorting all lines after 1766 # that, using the `sort` command in TSV (tab-separated values) mode, passing 1767 # all/any arguments/options to it 1768 stsv() { 1769 awk -v sort="sort -t \"$(printf '\t')\" $*" ' 1770 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1771 { gsub(/\r$/, "") } 1772 NR == 1 { print; fflush() } 1773 NR > 1 { print | sort } 1774 ' 1775 } 1776 1777 # use the result of the `awk` function `substr` for each line 1778 substr() { 1779 local start="${1:-1}" 1780 local length="${2:-80}" 1781 [ $# -gt 0 ] && shift 1782 [ $# -gt 0 ] && shift 1783 awk -v start="${start}" -v len="${length}" \ 1784 '{ print substr($0, start, len); fflush() }' "$@" 1785 } 1786 1787 # append a final Tab-Separated-Values line with the sums of all columns from 1788 # the input table(s) given; items from first lines aren't counted/added 1789 sumtsv() { 1790 awk -F "\t" ' 1791 # FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1792 1793 { 1794 gsub(/\r$/, "") 1795 print; fflush() 1796 if (width < NF) width = NF 1797 } 1798 1799 FNR > 1 { for (i = 1; i <= NF; i++) sums[i] += $i + 0 } 1800 1801 END { 1802 for (i = 1; i <= width; i++) { 1803 if (i > 1) printf "\t" 1804 printf "%s", sums[i] "" 1805 } 1806 if (width > 0) printf "\n" 1807 } 1808 ' "$@" 1809 } 1810 1811 # show a reverse-sorted tally of all lines read, where ties are sorted 1812 # alphabetically 1813 tally() { 1814 awk -v sortcmd="sort -t \"$(printf '\t')\" -rnk2 -k1d" ' 1815 # reassure users by instantly showing the header 1816 BEGIN { print "value\ttally"; fflush() } 1817 { gsub(/\r$/, ""); t[$0]++ } 1818 END { for (k in t) { printf("%s\t%d\n", k, t[k]) | sortcmd } } 1819 ' "$@" 1820 } 1821 1822 # Tab AWK: TSV-specific I/O settings for `awk` 1823 # tawk() { awk -F "\t" -v OFS="\t" "$@"; } 1824 1825 # Tab AWK: TSV-specific I/O settings for `awk` 1826 tawk() { stdbuf -oL awk -F "\t" -v OFS="\t" "$@"; } 1827 1828 # Titled conCATenate Lines highlights each filename, before emitting its lines 1829 tcatl() { 1830 awk ' 1831 FNR == 1 { printf "\x1b[7m%s\x1b[0m\n", FILENAME; fflush() } 1832 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1833 { gsub(/\r$/, ""); print; fflush() } 1834 ' "$@" 1835 } 1836 1837 # simulate the cadence of old-fashioned teletype machines, by slowing down 1838 # the output of ASCII/UTF-8 symbols from the standard-input 1839 teletype() { 1840 awk ' 1841 { 1842 gsub(/\r$/, "") 1843 1844 n = length 1845 for (i = 1; i <= n; i++) { 1846 if (code = system("sleep 0.015")) exit code 1847 printf "%s", substr($0, i, 1); fflush() 1848 } 1849 if (code = system("sleep 0.75")) exit code 1850 printf "\n"; fflush() 1851 } 1852 1853 # END { if (NR > 0 && code != 0) printf "\n" } 1854 ' "$@" 1855 } 1856 1857 # lowercase all ASCII symbols 1858 tolower() { awk '{ print tolower($0); fflush() }' "$@"; } 1859 1860 # get the processes currently using the most cpu 1861 topcpu() { 1862 local n="${1:-10}" 1863 [ "$n" -gt 0 ] && ps aux | awk ' 1864 NR == 1 { print; fflush() } 1865 NR > 1 { print | "sort -rnk3" } 1866 ' | head -n "$(("$n" + 1))" 1867 } 1868 1869 # get the processes currently using the most memory 1870 topmemory() { 1871 local n="${1:-10}" 1872 [ "$n" -gt 0 ] && ps aux | awk ' 1873 NR == 1 { print; fflush() } 1874 NR > 1 { print | "sort -rnk6" } 1875 ' | head -n "$(("$n" + 1))" 1876 } 1877 1878 # transpose (switch) rows and columns from tables 1879 transpose() { 1880 awk ' 1881 { gsub(/\r$/, "") } 1882 1883 FNR == 1 { FS = ($0 ~ /\t/) ? "\t" : " "; $0 = $0 } 1884 1885 { 1886 for (i = 1; i <= NF; i++) lines[i][NR] = $i 1887 if (maxitems < NF) maxitems = NF 1888 } 1889 1890 END { 1891 for (j = 1; j <= maxitems; j++) { 1892 for (i = 1; i <= NR; i++) { 1893 if (i > 1) printf "\t" 1894 printf "%s", lines[j][i] 1895 } 1896 printf "\n" 1897 } 1898 } 1899 ' "$@" 1900 } 1901 1902 # Underline (lines) with AWK 1903 uawk() { 1904 local cond="${1:-1}" 1905 [ $# -gt 0 ] && shift 1906 awk ' 1907 { low = lower = tolower($0) } 1908 '"${cond}"' { 1909 gsub(/\x1b\[0m/, "\x1b[0m\x1b[4m") 1910 printf "\x1b[4m%s\x1b[0m\n", $0; fflush() 1911 next 1912 } 1913 { print; fflush() } 1914 ' "$@" 1915 } 1916 1917 # Underline Every few lines: make groups of 5 lines (by default) stand out by 1918 # underlining the last line of each 1919 alias ue=zebra 1920 1921 # deduplicate lines, keeping them in their original order 1922 unique() { awk '!c[$0]++ { print; fflush() }' "$@"; } 1923 1924 # convert United States Dollars into CAnadian Dollars, using the latest 1925 # official exchange rates from the bank of canada; during weekends, the 1926 # latest rate may be from a few days ago; the default amount of usd to 1927 # convert is 1, when not given 1928 usd2cad() { 1929 local site='https://www.bankofcanada.ca/valet/observations/group' 1930 local csv_rates="${site}/FX_RATES_DAILY/csv" 1931 local url 1932 url="${csv_rates}?start_date=$(date -d '3 days ago' +'%Y-%m-%d')" 1933 curl -s "${url}" | awk -F, -v amount="$(echo "${1:-1}" | sed 's-_--g')" ' 1934 /USD/ { for (i = 1; i <= NF; i++) if($i ~ /USD/) j = i } 1935 END { gsub(/"/, "", $j); if (j != 0) printf "%.2f\n", amount * $j }' 1936 } 1937 1938 # find all WEB/hyperLINKS (https:// and http://) in the input text 1939 weblinks() { 1940 awk ' 1941 BEGIN { e = "https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*" } 1942 { 1943 # match all links in the current line 1944 for (s = $0; match(s, e); s = substr(s, RSTART + RLENGTH)) { 1945 print substr(s, RSTART, RLENGTH); fflush() 1946 } 1947 } 1948 ' "$@" 1949 } 1950 1951 # run `xargs`, using whole lines as extra arguments 1952 xl() { 1953 awk -v ORS='\000' ' 1954 FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1955 { gsub(/\r$/, ""); print; fflush() } 1956 ' | xargs -0 "$@" 1957 } 1958 1959 # underline every few lines: make groups of 5 lines (by default) stand out by 1960 # underlining the last line of each 1961 zebra() { 1962 local n="${1:-5}" 1963 [ $# -gt 0 ] && shift 1964 awk -v n="$n" ' 1965 BEGIN { if (n == 0) n = -1 } 1966 NR % n == 0 && NR != 1 { 1967 gsub(/\x1b\[0m/, "\x1b[0m\x1b[4m") 1968 printf("\x1b[4m%s\x1b[0m\n", $0); fflush() 1969 next 1970 } 1971 { print; fflush() } 1972 ' "$@" 1973 }