File: awksome.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # AWKsome
  27 #
  28 # This is a collection of arguably useful shell functions/shortcuts which
  29 # use AWK. Some of these can also be implemented using tools such as `sed`
  30 # and `tr`, but these tools don't support any number of trailing filenames,
  31 # which makes using their AWK-based versions more convenient to use.
  32 #
  33 # Some tools also use `curl`, since they need data from live outside sources;
  34 # some tools also use `sed`, for convenience.
  35 #
  36 # A few of these explicitly use GNU awk (gawk), mainly due to its support of
  37 # UTF-8: it's better for something to result in an overt error/failure (such
  38 # as `gawk not installed`) than risk incurring into subtle bugs (handling
  39 # multi-byte UTF-8 runes as multiple separate text items).
  40 
  41 
  42 # dash doesn't support regex-matching syntax, forcing to use case statements
  43 case "$0" in
  44     -bash|-dash|-sh|bash|dash|sh)
  45         # script is being sourced with bash or dash, which is good
  46         :
  47     ;;
  48     *)
  49         case "$ZSH_EVAL_CONTEXT" in
  50             *:file)
  51                 # script is being sourced with zsh, which is good
  52                 :
  53             ;;
  54             *)
  55                 # script is being run normally, which is a waste of time
  56 printf "\e[48;2;255;255;135m\e[38;2;0;0;0mDon't run this script, source it instead: to do that,\e[0m\n"
  57 printf "\e[48;2;255;255;135m\e[38;2;0;0;0mrun 'source awksome' or '. awksome' (no quotes either way).\e[0m\n"
  58                 # failing during shell-startup may deny shell access, so exit
  59                 # with a 0 error-code to declare success
  60                 exit 0
  61             ;;
  62         esac
  63     ;;
  64 esac
  65 
  66 
  67 # emit each argument given as its own line of output
  68 args() { awk 'BEGIN { for (i = 1; i < ARGC; i++) print ARGV[i]; exit }' "$@"; }
  69 
  70 # avoid/ignore lines which match any of the regexes given
  71 avoid() {
  72     awk '
  73         BEGIN { for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } }
  74 
  75         {
  76             for (i = 1; i < ARGC; i++) if ($0 ~ e[i]) next
  77             print; fflush()
  78             got++
  79         }
  80 
  81         END { exit(got == 0) }
  82     ' "${@:-^\r?$}"
  83 }
  84 
  85 # BACKward-SORT (numerically) using values from the columns whose name match
  86 # the arguments given, either exactly, case-insensitively, as a 1-based index,
  87 # or even as a negative/backward indices.
  88 #
  89 # Sorting happens by comparing fields in the order given.
  90 #
  91 # The output is always lines of TSV (tab-separated values) items, even when
  92 # the lines from stdin aren't.
  93 backsort() {
  94     awk '
  95         function findcol(name, lowname, i) {
  96             for (i = 1; i <= NF; i++) {
  97                 if (name == $i) return i
  98             }
  99 
 100             for (i = 1; i <= NF; i++) {
 101                 if (lowname == tolower($i)) return i
 102             }
 103 
 104             if (1 <= name && name <= NF) return name + 0
 105             if (name < 0 && -name <= NF) return NF + name + 1
 106 
 107             return 0
 108         }
 109 
 110         BEGIN {
 111             for (i = 1; i < ARGC; i++) {
 112                 colnames[i] = ARGV[i]
 113                 lownames[i] = tolower(colnames[i])
 114                 delete ARGV[i]
 115             }
 116         }
 117 
 118         { gsub(/\r$/, "") }
 119 
 120         NR == 1 {
 121             if ($0 ~ /\t/) {
 122                 FS = "\t"
 123                 $0 = $0
 124             }
 125 
 126             width = NF
 127             given = length(colnames)
 128 
 129             for (i = 1; i <= given; i++) {
 130                 j = findcol(colnames[i], lownames[i])
 131                 if (j > 0) pos[++numcols] = j
 132 
 133                 if (j == 0) {
 134                     fmt = "\x1b[31mno column match for \"%s\"\x1b[0m\n"
 135                     printf(fmt, colnames[i]) > "/dev/stderr"
 136                     errors++
 137                 }
 138             }
 139 
 140             if (errors > 0) exit 1
 141 
 142             cmd = "sort -t '\t'"
 143             for (i = 1; i <= numcols; i++) {
 144                 cmd = cmd sprintf(" -rnk%d", pos[i])
 145             }
 146 
 147             for (i = 1; i <= width; i++) {
 148                 if (i > 1) printf "\t"
 149                 printf("%s", $i)
 150             }
 151             printf "\n"; fflush()
 152 
 153             next
 154         }
 155 
 156         {
 157             for (i = 1; i <= width; i++) {
 158                 if (i > 1) printf "\t" | cmd
 159                 printf("%s", $i) | cmd
 160             }
 161             printf "\n" | cmd
 162         }
 163     ' "$@"
 164 }
 165 
 166 # process Blocks/paragraphs of non-empty lines with AWK
 167 # bawk() { awk -F='' -v RS='' "$@"; }
 168 
 169 # process Blocks/paragraphs of non-empty lines with AWK
 170 bawk() { stdbuf -oL awk -F='' -v RS='' "$@"; }
 171 
 172 # start by joining all arguments given as a tab-separated-items line of output,
 173 # followed by all lines from stdin verbatim
 174 begintsv() {
 175     awk '
 176         BEGIN {
 177             for (i = 1; i < ARGC; i++) {
 178                 if (i > 1) printf "\t"
 179                 printf "%s", ARGV[i]
 180                 delete ARGV[i]
 181             }
 182             if (ARGC > 1) printf "\n"
 183             fflush()
 184         }
 185         { print; fflush() }
 186     ' "$@"
 187 }
 188 
 189 # Breathe Header: add an empty line after the first one (the header), then
 190 # separate groups of 5 lines (by default) with empty lines between them
 191 bh() {
 192     local n="${1:-5}"
 193     [ $# -gt 0 ] && shift
 194     awk -v n="$n" '
 195         BEGIN { if (n == 0) n = -1 }
 196         (NR - 1) % n == 1 && NR > 1 { print "" }
 197         { print; fflush() }
 198     ' "$@"
 199 }
 200 
 201 # Breathe Header 5: add an empty line after the first one (the header),
 202 # then separate groups of 5 lines with empty lines between them
 203 bh5() {
 204     awk '
 205         (NR - 1) % 5 == 1 && NR > 1 { print "" }
 206         { print; fflush() }
 207     ' "$@"
 208 }
 209 
 210 # Breathe Lines: separate groups of 5 lines (by default) with empty lines
 211 bl() {
 212     local n="${1:-5}"
 213     [ $# -gt 0 ] && shift
 214     awk -v n="$n" '
 215         BEGIN { if (n == 0) n = -1 }
 216         NR % n == 1 && NR != 1 { print "" }
 217         { print; fflush() }
 218     ' "$@"
 219 }
 220 
 221 # Breathe Lines 5: separate groups of 5 lines with empty lines
 222 bl5() {
 223     awk '
 224         NR % 5 == 1 && NR != 1 { print "" }
 225         { print; fflush() }
 226     ' "$@"
 227 }
 228 
 229 # process BLocks/paragraphs of non-empty lines with AWK
 230 # blawk() { awk -F='' -v RS='' "$@"; }
 231 
 232 # process BLocks/paragraphs of non-empty lines with AWK
 233 blawk() { stdbuf -oL awk -F='' -v RS='' "$@"; }
 234 
 235 # Begin-Only Awk
 236 boa() { awk "BEGIN { $1; exit }"; }
 237 
 238 # Begin Print Exit
 239 bpe() { awk "BEGIN { print $1; exit }"; }
 240 
 241 # split lines using the regex given, turning them into single-item lines
 242 breakdown() {
 243     local sep="${1:- }"
 244     [ $# -gt 0 ] && shift
 245     awk -v FPAT="${sep}" '{ for (i = 1; i <= NF; i++) print $i; fflush() }' "$@"
 246 }
 247 
 248 # separate groups of 5 lines (by default) with empty lines
 249 breathe() {
 250     local n="${1:-5}"
 251     [ $# -gt 0 ] && shift
 252     awk -v n="$n" '
 253         BEGIN { if (n == 0) n = -1 }
 254         NR % n == 1 && NR != 1 { print "" }
 255         { print; fflush() }
 256     ' "$@"
 257 }
 258 
 259 # Book-like Side-By-Side lays out text lines into several columns, separating
 260 # them with a special symbol. This script lets you see more data at once, as
 261 # monitors are wider than tall and most text content has fairly short lines.
 262 #
 263 # If a column-count isn't given, it's 2 by default, just like with books. If
 264 # no named inputs are given, lines are read from the standard input.
 265 bsbs() {
 266     local num_columns
 267     local height
 268     local failed
 269     local arg
 270 
 271     if [ $# -eq 0 ] && [ ! -p /dev/stdin ]; then
 272         awk '/^# +bsbs /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
 273         printf "\e[32mno files given and stdin not being piped into\e[0m\n"
 274         return 0
 275     fi
 276 
 277     num_columns=2
 278     if [ "$(echo "$1" | grep -E '^[+-]?[0-9]+$' 2> /dev/null)" ]; then
 279         num_columns="$1"
 280         shift
 281     fi
 282 
 283     if [ "${num_columns}" -lt 1 ]; then
 284         num_columns=1
 285     fi
 286 
 287     # use the current screen height
 288     height="$(tput lines)"
 289 
 290     if [ "${height}" -lt 2 ]; then
 291         printf "\e[31mscreen/window is too short to show content\e[0m\n" >&2
 292         return 1
 293     fi
 294 
 295     # show all non-existing files given
 296     failed=0
 297     for arg in "$@"; do
 298         if [ "${arg}" = "-" ]; then
 299             continue
 300         fi
 301         if [ ! -e "${arg}" ]; then
 302             printf "\e[31mno file named \"%s\"\e[0m\n" "${arg}" > /dev/stderr
 303             failed=1
 304         fi
 305     done
 306 
 307     # in case of errors, avoid showing an empty screen
 308     if [ "${failed}" -gt 0 ]; then
 309         return 1
 310     fi
 311 
 312     # allow loading lines from multiple files, ensuring no lines are accidentally
 313     # joined across inputs
 314     awk 1 "$@" |
 315 
 316     # ignore leading UTF-8 BOMs (byte-order marks) and trailing carriage-returns:
 317     # the latter in particular will ruin side-by-side output
 318     sed 's-^\xef\xbb\xbf--; s-\r$--' |
 319 
 320     # before laying out lines side-by-side, expand all tabs
 321     expand -t 4 |
 322 
 323     # lay things side-by-side, like pages/faces in a book
 324     awk -v num_cols="${num_columns}" -v height="${height}" '
 325         BEGIN {
 326             inner_rows = height - 2
 327         }
 328 
 329         # remember all lines; assumes carriage-returns are already removed
 330         {
 331             p = NR - 1
 332             lines[p] = $0
 333             gsub(/\x1b\[[0-9;]*[A-Za-z]/, "")
 334             widths[p] = length($0)
 335         }
 336 
 337         # round up non-integers
 338         function ceil(n) {
 339             return (n % 1) ? n - (n % 1) + 1 : n
 340         }
 341 
 342         END {
 343             # if a single column is enough for all lines, just do it that way
 344             if (NR <= inner_rows) {
 345                 for (i = 0; i < NR; i++) print lines[i]
 346                 exit
 347             }
 348 
 349             # avoid empty trailing columns
 350             if (NR < inner_rows * num_cols) num_cols = ceil(NR / inner_rows)
 351             # ensure number of columns is valid
 352             if (num_cols < 1) num_cols = 1
 353 
 354             for (i = 0; i < NR; i += inner_rows * num_cols) {
 355                 for (j = 0; j < inner_rows; j++) {
 356                     for (k = 0; k < num_cols; k++) {
 357                         w = widths[i + k * inner_rows + j]
 358                         if (max_widths[k] < w) max_widths[k] = w
 359                     }
 360                 }
 361             }
 362 
 363             widest = 0
 364             for (i in max_widths) {
 365                 if (widest < max_widths[i]) widest = max_widths[i]
 366             }
 367 
 368             total_max_width = 0
 369             for (i = 0; i < num_cols; i++) {
 370                 total_max_width += max_widths[i]
 371             }
 372             # also count separators, which are 3-items wide
 373             if (num_cols > 0) total_max_width += 3 * (num_cols - 1)
 374 
 375             # make separator wide enough to match the length of any output line
 376             bottom_sep = "································"
 377             for (nsep = 32; nsep < total_max_width; nsep *= 2) {
 378                 bottom_sep = bottom_sep bottom_sep
 379             }
 380             # separator is used directly, so match the needed width exactly
 381             bottom_sep = substr(bottom_sep, 1, total_max_width)
 382 
 383             # emit lines side by side
 384             for (i = 0; i < NR; i += inner_rows * num_cols) {
 385                 # emit a page-bottom/separator line between pages of columns
 386                 if (i > 0) print bottom_sep
 387 
 388                 for (j = 0; j < inner_rows; j++) {
 389                     # bottom-pad last page-pair with empty lines, so page-scroll
 390                     # on viewers like `less` stays in sync with the page bottoms
 391                     if (NR - i - j <= 0) {
 392                         print ""
 393                         continue
 394                     }
 395 
 396                     for (k = 0; k < num_cols; k++) {
 397                         p = i + k * inner_rows + j
 398                         l = lines[p]
 399 
 400                         if (k > 0) {
 401                             printf "\x1b[0m █"
 402                             if (k != num_cols - 1 || l != "") printf " "
 403                         }
 404 
 405                         printf "%s", l
 406 
 407                         if (k < num_cols - 1) {
 408                             pad = max_widths[k] - widths[p]
 409                             if (pad > 0) printf "%*s", pad, ""
 410                         }
 411                     }
 412 
 413                     print "\x1b[0m"
 414                 }
 415             }
 416 
 417             # end last page with an empty line, instead of the usual page-sep
 418             if (NR > 0) print ""
 419         }
 420     ' |
 421 
 422     # view the result interactively
 423     less -JMKiCRS
 424 }
 425 
 426 # show a reverse-sorted tally of all lines read, where ties are sorted
 427 # alphabetically, and where trailing bullets are added to quickly make
 428 # the tally counts comparable at a glance
 429 bully() {
 430     awk -v sortcmd="sort -t \"$(printf '\t')\" -rnk2 -k1d" '
 431         # reassure users by instantly showing the header
 432         BEGIN { print "value\ttally\tbullets"; fflush() }
 433 
 434         { gsub(/\r$/, ""); tally[$0]++ }
 435 
 436         END {
 437             bullet = "•"
 438 
 439             # find the max tally, which is needed to build the bullets-string
 440             max = 0
 441             for (k in tally) if (max < tally[k]) max = tally[k]
 442 
 443             # make enough bullets for all tallies: this loop makes growing the
 444             # string a task with complexity O(n * log n), instead of a naive
 445             # O(n**2), which can slow-down things when tallies are high enough
 446             bullets = bullet
 447             for (n = max; n > 1; n /= 2) bullets = bullets bullets
 448 
 449             # emit unsorted output lines to the sort cmd, which will emit the
 450             # final reverse-sorted tally lines
 451             for (k in tally) {
 452                 t = tally[k]
 453                 s = (t == 1) ? bullet : substr(bullets, 1, t)
 454                 printf "%s\t%d\t%s\n", k, t, s | sortcmd
 455             }
 456         }
 457     ' "$@"
 458 }
 459 
 460 # uppercase the first letter on each line, and lowercase all later letters
 461 capitalize() {
 462     awk '{ print; fflush() }' "$@" | sed -E -u 's-^(.*)-\L\1-; s-^(.)-\u\1-'
 463 }
 464 
 465 # conCATenate Lines guarantees no lines are ever accidentally joined
 466 # across inputs, always emitting a line-feed at the end of every line
 467 catl() { awk '{ print; fflush() }' "$@"; }
 468 
 469 # Count with AWK: count the times the AWK expression/condition given is true
 470 cawk() {
 471     local cond="${1:-1}"
 472     [ $# -gt 0 ] && shift
 473     awk "
 474         { low = lower = tolower(\$0) }
 475         ${cond} { count++ }
 476         END { print count }
 477     " "$@"
 478 }
 479 
 480 # center-align lines of text, using the current screen width
 481 center() {
 482     gawk -v width="$(tput cols)" '
 483         {
 484             gsub(/\r$/, "")
 485             lines[NR] = $0
 486             s = $0
 487             gsub(/\x1b\[[0-9;]*[A-Za-z]/, "", s) # ANSI style-changers
 488             l = length(s)
 489             if (maxlen < l) maxlen = l
 490         }
 491 
 492         END {
 493             n = (width - maxlen) / 2
 494             if (n % 1) n = n - (n % 1)
 495             fmt = sprintf("%%%ds%%s\n", (n > 0) ? n : 0)
 496             for (i = 1; i <= NR; i++) printf fmt, "", lines[i]
 497         }
 498     ' "$@"
 499 }
 500 
 501 # ignore final life-feed from text, if it's the very last byte; also ignore
 502 # all trailing carriage-returns
 503 choplf() {
 504     awk '
 505         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
 506         NR > 1 { print ""; fflush() }
 507         { gsub(/\r$/, ""); printf "%s", $0; fflush() }
 508     ' "$@"
 509 }
 510 
 511 # COunt COndition: count how many times the AWK expression given is true
 512 coco() {
 513     local cond="${1:-1}"
 514     [ $# -gt 0 ] && shift
 515     awk "
 516         { low = lower = tolower(\$0) }
 517         ${cond} { count++ }
 518         END { print count }
 519     " "$@"
 520 }
 521 
 522 # split lines using the string given, turning them into single-item lines
 523 crumble() {
 524     local sep="${1:- }"
 525     [ $# -gt 0 ] && shift
 526     awk -F "${sep}" '{ for (i = 1; i <= NF; i++) print $i; fflush() }' "$@"
 527 }
 528 
 529 # DECAPitate (lines) emits the first line as is, piping all lines after that
 530 # to the command given, passing all/any arguments/options to it
 531 # decap() {
 532 #     awk -v cmd="$*" 'NR == 1 { print; fflush() } NR > 1 { print | cmd }'
 533 # }
 534 
 535 # ignore whole-comment lines, or just trailing unix-style comments in them
 536 decomment() {
 537     awk '/^ *#/ { next } { gsub(/ *#.*$/, ""); print; fflush(); }' "$@"
 538 }
 539 
 540 # DEDUPlicate prevents lines from appearing more than once
 541 dedup() { awk '!c[$0]++ { print; fflush() }' "$@"; }
 542 
 543 # dictionary-define the word given, using an online service
 544 define() {
 545     local arg
 546     local gap=0
 547     local options='-JMKiCRS'
 548 
 549     if [ $# -eq 0 ]; then
 550         printf "\e[38;2;204;0;0mdefine: no names given\e[0m\n" >&2
 551         return 1
 552     fi
 553 
 554     if [ $# -eq 1 ]; then
 555         options='--header=1 -JMKiCRS'
 556     fi
 557 
 558     for arg in "$@"; do
 559         [ "${gap}" -gt 0 ] && printf "\n"
 560         gap=1
 561         printf "\e[7m%-80s\x1b[0m\n" "${arg}"
 562         curl -s "dict://dict.org/d:${arg}" | awk '
 563             { gsub(/\r$/, "") }
 564             /^151 / {
 565                 printf "\x1b[38;2;52;101;164m%s\x1b[0m\n", $0; fflush()
 566                 next
 567             }
 568             /^[1-9][0-9]{2} / {
 569                 printf "\x1b[38;2;128;128;128m%s\x1b[0m\n", $0; fflush()
 570                 next
 571             }
 572             { print; fflush() }
 573         '
 574     done | less "${options}"
 575 }
 576 
 577 # convert lines of Space(s)-Separated Values into lines of tab-separated values
 578 dessv() {
 579     awk '
 580         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
 581 
 582         {
 583             gsub(/\r$/, "")
 584             for (i = 1; i <= NF; i++) {
 585                 if (i > 1) printf "\t"
 586                 printf "%s", $i
 587             }
 588             printf "\n"; fflush()
 589         }
 590     ' "$@"
 591 }
 592 
 593 # ignore trailing spaces, as well as trailing carriage returns
 594 detrail() { awk '{ gsub(/ *\r?$/, ""); print; fflush() }' "$@"; }
 595 
 596 # DIVide 2 numbers 3 ways, including the complement
 597 div() {
 598     awk -v a="${1:-1}" -v b="${2:-1}" '
 599         BEGIN {
 600             gsub(/_/, "", a)
 601             gsub(/_/, "", b)
 602             if (a > b) { c = a; a = b; b = c }
 603             c = 1 - a / b
 604             if (0 <= c && c <= 1) printf "%f\n%f\n%f\n", a / b, b / a, c
 605             else printf "%f\n%f\n", a / b, b / a
 606             exit
 607         }'
 608 }
 609 
 610 # ignore/remove all matched regexes given on all stdin lines
 611 drop() {
 612     awk '
 613         BEGIN { for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } }
 614         {
 615             for (i = 1; i < ARGC; i++) gsub(e[i], "")
 616             print; fflush()
 617         }
 618     ' "${@:-\r$}"
 619 }
 620 
 621 # Drop Tab Separated-Values, ignoring all columns given, and keeping all the
 622 # others. The column names given are matched using the first line from stdin.
 623 # Matching works by first trying exact matching, then by case-insensitive
 624 # matching, when there's no exact match, finally giving up when there's no
 625 # match either way.
 626 dtsv() {
 627     awk -F "\t" '
 628         BEGIN {
 629             for (i = 1; i < ARGC; i++) {
 630                 colnames[++n] = ARGV[i]
 631                 lownames[n] = tolower(ARGV[i])
 632                 delete ARGV[i]
 633             }
 634         }
 635 
 636         function findcol(name, lowname, i) {
 637             for (i = 1; i <= NF; i++) if (name == $i) return i
 638             for (i = 1; i <= NF; i++) if (lowname == tolower($i)) return i
 639 
 640             if (1 <= name && name <= NF) return name + 0
 641             if (name < 0 && -name <= NF) return NF + name + 1
 642 
 643             return 0
 644         }
 645 
 646         { gsub(/\r$/, "") }
 647 
 648         NR == 1 {
 649             numcols = NF
 650             for (i = 1; i <= numcols; i++) keep[i] = 1
 651 
 652             colsused = numcols
 653             for (i = 1; i <= numcols; i++) {
 654                 j = findcol(colnames[i], lownames[i])
 655                 if (j > 0) {
 656                     keep[j] = 0
 657                     colsused--
 658                 }
 659 
 660                 if (j == 0) {
 661                     fmt = "\x1b[31mno column match for \"%s\"\x1b[0m\n"
 662                     printf(fmt, colnames[i]) > "/dev/stderr"
 663                     errors++
 664                 }
 665             }
 666 
 667             if (errors > 0) exit 1
 668             if (colsused == 0) exit
 669         }
 670 
 671         {
 672             c = 0
 673             for (i = 1; i <= numcols; i++) {
 674                 if (keep[i] == 1) {
 675                     if (c > 0) printf "\t"
 676                     printf "%s", $i
 677                     c++
 678                 }
 679             }
 680 
 681             printf "\n"; fflush()
 682         }
 683     ' "$@"
 684 }
 685 
 686 # European Central Bank Latest Exchange Rates, as tab-separated values
 687 ecbler() {
 688     # load data from the european central bank website
 689     wget -q -O - 'https://www.ecb.europa.eu/stats/eurofxref/eurofxref.zip' |
 690     # decompress payload
 691     gzip -d |
 692     # turn CSV into TSV, ignoring trailing empty fields
 693     sed -E 's- *\r$--; s-,$--; s-, *-\t-g; s-\t+$--' |
 694     # reciprocate dividers into multipliers
 695     awk -F "\t" '
 696         NR == 1 { print }
 697         NR == 2 {
 698             printf "%s", $1
 699             for (i = 2; i <= NF; i++) printf("\t%.6f", 1.0 / $i)
 700             printf "\n"
 701         }
 702     ' |
 703     # pick a subset of the columns
 704     awk -F "\t" -v OFS="\t" \
 705         '{ print $1, $16, $17, $18, $19, $23, $25, $7, $2 }' |
 706     # turn dates into the yyyy/mm/dd format
 707     sed -E \
 708         -e 's-([0-9]+) ([A-Za-z]{3})[a-z]+ ([0-9]+)-\3/\L\2/\1-g' \
 709         -e 's-jan-01-g' -e 's-feb-02-g' -e 's-mar-03-g' \
 710         -e 's-apr-04-g' -e 's-may-05-g' -e 's-jun-06-g' \
 711         -e 's-jul-07-g' -e 's-aug-08-g' -e 's-sep-09-g' \
 712         -e 's-oct-10-g' -e 's-nov-11-g' -e 's-dec-12-g'
 713 }
 714 
 715 # fix lines, ignoring leading UTF-8_BOMs (byte-order-marks) on each input's
 716 # first line, turning all end-of-line CRLF byte-pairs into single line-feeds,
 717 # and ensuring each input's last line ends with a line-feed; trailing spaces
 718 # are also ignored
 719 fixlines() {
 720     awk '
 721         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
 722         { gsub(/ *\r?$/, ""); print; fflush() }
 723     ' "$@"
 724 }
 725 
 726 # convert FeeT into meters
 727 ft() {
 728     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
 729         awk '/./ { printf "%.2f\n", 0.3048 * $0; fflush() }'
 730 }
 731 
 732 # convert FeeT² (squared) into meters²
 733 ft2() {
 734     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
 735         awk '/./ { printf "%.2f\n", 0.09290304 * $0 }'
 736 }
 737 
 738 # convert a mix of FeeT and INches into meters
 739 ftin() {
 740     local ft="${1:-0}"
 741     ft="$(echo "${ft}" | sed 's-_--g')"
 742     local in="${2:-0}"
 743     in="$(echo "${in}" | sed 's-_--g')"
 744     awk "BEGIN { print 0.3048 * ${ft} + 0.0254 * ${in}; exit }"
 745 }
 746 
 747 # convert GALlons into liters
 748 gal() {
 749     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
 750         awk '/./ { printf "%.2f\n", 3.785411784 * $0; fflush() }'
 751 }
 752 
 753 # convert binary GigaBytes into bytes
 754 gb() {
 755     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
 756         awk '/./ { printf "%.4f\n", 1073741824 * $0; fflush() }' |
 757         sed 's-\.00*$--'
 758 }
 759 
 760 # Good, Bad, Meh colors lines using up to 3 regular expressions, keeping all
 761 # other input lines verbatim
 762 gbm() {
 763     local good="$1"
 764     local bad="$2"
 765     local meh="$3"
 766     [ $# -gt 0 ] && shift
 767     [ $# -gt 0 ] && shift
 768     [ $# -gt 0 ] && shift
 769 
 770     awk '
 771         BEGIN {
 772             gotgood = ARGC > 1 && ARGV[1] != ""
 773             gotbad = ARGC > 2 && ARGV[2] != ""
 774             gotmeh = ARGC > 3 && ARGV[3] != ""
 775             good = ARGV[1]
 776             bad = ARGV[2]
 777             meh = ARGV[3]
 778             delete ARGV[1]
 779             delete ARGV[2]
 780             delete ARGV[3]
 781         }
 782 
 783         gotgood && $0 ~ good {
 784             # code to use a color-blind-friendlier blue, instead of green
 785             # gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;0;95;215m")
 786             # printf "\x1b[38;2;0;95;215m%s\x1b[0m\n", $0
 787             gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;0;135;95m")
 788             printf "\x1b[38;2;0;135;95m%s\x1b[0m\n", $0; fflush()
 789             next
 790         }
 791 
 792         gotbad && $0 ~ bad {
 793             gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;204;0;0m")
 794             printf "\x1b[38;2;204;0;0m%s\x1b[0m\n", $0; fflush()
 795             next
 796         }
 797 
 798         gotmeh && $0 ~ meh {
 799             gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;168;168;168m")
 800             printf "\x1b[38;2;168;168;168m%s\x1b[0m\n", $0; fflush()
 801             next
 802         }
 803 
 804         { print; fflush() }
 805     ' "${good}" "${bad}" "${meh}" "$@"
 806 }
 807 
 808 # glue/stick together various lines, only emitting a line-feed at the end; an
 809 # optional argument is the output-item-separator, which is empty by default
 810 glue() {
 811     local sep="${1:-}"
 812     [ $# -gt 0 ] && shift
 813     awk -v sep="${sep}" '
 814         NR > 1 { printf "%s", sep }
 815         { gsub(/\r/, ""); printf "%s", $0; fflush() }
 816         END { if (NR > 0) print ""; fflush() }
 817     ' "$@"
 818 }
 819 
 820 # GRoup via AWK groups lines using common results of the AWK expression given
 821 grawk() {
 822     local code="${1:-\$0}"
 823     [ $# -gt 0 ] && shift
 824 
 825     awk '
 826         { low = lower = tolower($0) }
 827 
 828         {
 829             k = '"${code}"'
 830             if (!(k in groups)) ordkeys[++okl] = k
 831             groups[k][length(groups[k]) + 1] = $0
 832         }
 833 
 834         END {
 835             for (i = 1; i <= okl; i++) {
 836                 k = ordkeys[i]
 837                 n = length(groups[k])
 838                 for (j = 1; j <= n; j++) print groups[k][j]
 839             }
 840         }
 841     ' "$@"
 842 }
 843 
 844 # Style lines using a GRAY-colored BACKground
 845 grayback() {
 846     awk '
 847         {
 848             gsub(/\x1b\[0m/, "\x1b[0m\x1b[48;2;218;218;218m")
 849             printf "\x1b[48;2;218;218;218m%s\x1b[0m\n", $0; fflush()
 850         }
 851     ' "$@"
 852 }
 853 
 854 # Global extended regex SUBstitute, using the AWK function of the same name:
 855 # arguments are used as regex/replacement pairs, in that order
 856 gsub() {
 857     awk '
 858         BEGIN {
 859             for (i = 1; i < ARGC; i++) {
 860                 args[++n] = ARGV[i]
 861                 delete ARGV[i]
 862             }
 863         }
 864         {
 865             for (i = 1; i <= n; i += 2) gsub(args[i], args[i + 1])
 866             print; fflush()
 867         }
 868     ' "$@"
 869 }
 870 
 871 # Highlight (lines) with AWK
 872 hawk() {
 873     local cond="${1:-1}"
 874     [ $# -gt 0 ] && shift
 875     awk '
 876         { low = lower = tolower($0) }
 877         '"${cond}"' {
 878             gsub(/\x1b\[0m/, "\x1b[0m\x1b[7m")
 879             printf "\x1b[7m%s\x1b[0m\n", $0; fflush()
 880             next
 881         }
 882         { print; fflush() }
 883     ' "$@"
 884 }
 885 
 886 # show each byte as a pair of HEXadecimal (base-16) symbols
 887 hexify() {
 888     cat "$@" | od -x -A n |
 889         awk '{ gsub(/ +/, ""); printf "%s", $0; fflush() } END { printf "\n" }'
 890 }
 891 
 892 # highlight lines
 893 highlight() {
 894     awk '
 895         {
 896             gsub(/\x1b\[0m/, "\x1b[0m\x1b[7m")
 897             printf "\x1b[7m%s\x1b[0m\n", $0; fflush()
 898         }
 899     ' "$@"
 900 }
 901 
 902 # HIghlight LEAK emits/tees input both to stdout and stderr, highlighting what
 903 # it emits to stderr using an ANSI-style; this cmd is useful to `debug` pipes
 904 # involving several steps
 905 hileak() {
 906     awk '
 907         {
 908             gsub(/\x1b\[[0-9;]*[A-Za-z]/, "")
 909             printf "\x1b[7m%s\x1b[0m\n", $0 > "/dev/stderr"
 910             print; fflush()
 911         }
 912     ' "$@"
 913 }
 914 
 915 # convert seconds into a colon-separated Hours-Minutes-Seconds triple
 916 hms() {
 917     echo "${@:-0}" | sed -E 's-_--g; s- +-\n-g' | awk '/./ {
 918         x = $0
 919         h = (x - x % 3600) / 3600
 920         m = (x % 3600) / 60
 921         s = x % 60
 922         printf "%02d:%02d:%05.2f\n", h, m, s; fflush()
 923     }'
 924 }
 925 
 926 # find all hyperlinks inside HREF attributes in the input text
 927 href() {
 928     awk '
 929         BEGIN { e = "href=\"[^\"]+\"" }
 930         {
 931             for (s = $0; match(s, e); s = substr(s, RSTART + RLENGTH)) {
 932                 print substr(s, RSTART + 6, RLENGTH - 7); fflush()
 933             }
 934         }
 935     ' "$@"
 936 }
 937 
 938 # Index all lines starting from 0, using a tab right after each line number
 939 i() { awk '{ printf "%d\t%s\n", NR - 1, $0; fflush() }' "$@"; }
 940 
 941 # avoid/ignore lines which case-insensitively match any of the regexes given
 942 iavoid() {
 943     gawk '
 944         BEGIN {
 945             IGNORECASE = 1
 946             for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] }
 947         }
 948 
 949         {
 950             for (i = 1; i < ARGC; i++) if ($0 ~ e[i]) next
 951             print; fflush(); got++
 952         }
 953 
 954         END { exit(got == 0) }
 955     ' "${@:-^\r?$}"
 956 }
 957 
 958 # case-Insensitively DEDUPlicate prevents lines from appearing more than once
 959 idedup() { awk '!c[tolower($0)]++ { print; fflush() }' "$@"; }
 960 
 961 # ignore/remove all case-insensitively matched regexes given on all stdin lines
 962 idrop() {
 963     gawk '
 964         BEGIN {
 965             IGNORECASE = 1
 966             for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] }
 967         }
 968 
 969         {
 970             for (i = 1; i < ARGC; i++) gsub(e[i], "")
 971             print; fflush()
 972         }
 973     ' "${@:-\r$}"
 974 }
 975 
 976 # only keep lines which case-insensitively match any of the regexes given
 977 imatch() {
 978     gawk '
 979         BEGIN {
 980             IGNORECASE = 1
 981             for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] }
 982         }
 983 
 984         {
 985             for (i = 1; i < ARGC; i++) {
 986                 if ($0 ~ e[i]) {
 987                     print; fflush()
 988                     got++
 989                     next
 990                 }
 991             }
 992         }
 993 
 994         END { exit(got == 0) }
 995     ' "${@:-[^\r]}"
 996 }
 997 
 998 # start each non-empty line with extra n spaces
 999 indent() {
1000     awk '
1001         BEGIN {
1002             n = ARGV[1] + 0
1003             delete ARGV[1]
1004             fmt = sprintf("%%%ds%%s\n", (n > 0) ? n : 0)
1005         }
1006 
1007         /^\r?$/ { print ""; fflush(); next }
1008         { gsub(/\r$/, ""); printf(fmt, "", $0); fflush() }
1009     ' "$@"
1010 }
1011 
1012 # emit each word-like item from each input line on its own line; when a file
1013 # has tabs on its first line, items are split using tabs alone, which allows
1014 # items to have spaces in them
1015 items() {
1016     awk '
1017         FNR == 1 { FS = ($0 ~ /\t/) ? "\t" : " "; $0 = $0 }
1018         { gsub(/\r$/, ""); for (i = 1; i <= NF; i++) print $i; fflush() }
1019     ' "$@"
1020 }
1021 
1022 # case-insensitively deduplicate lines, keeping them in their original order:
1023 # the checking/matching is case-insensitive, but each first match is output
1024 # exactly as is
1025 iunique() { awk '!c[tolower($0)]++ { print; fflush() }' "$@"; }
1026 
1027 # Judge with AWK colors lines green/red/gray, using up to 3 AWK conditions
1028 jawk() {
1029     local good="${1:-0}"
1030     local bad="${2:-0}"
1031     local meh="${3:-0}"
1032 
1033     [ $# -gt 0 ] && shift
1034     [ $# -gt 0 ] && shift
1035     [ $# -gt 0 ] && shift
1036 
1037     awk '
1038         { low = lower = tolower($0) }
1039 
1040         '"${good}"' {
1041             # code to use a color-blind-friendlier blue, instead of green
1042             # gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;0;95;215m")
1043             # printf "\x1b[38;2;0;95;215m%s\x1b[0m\n", $0; fflush()
1044 
1045             gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;0;135;95m")
1046             printf "\x1b[38;2;0;135;95m%s\x1b[0m\n", $0; fflush()
1047             next
1048         }
1049 
1050         '"${bad}"' {
1051             gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;204;0;0m")
1052             printf "\x1b[38;2;204;0;0m%s\x1b[0m\n", $0; fflush()
1053             next
1054         }
1055 
1056         '"${meh}"' {
1057             gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;168;168;168m")
1058             printf "\x1b[38;2;168;168;168m%s\x1b[0m\n", $0; fflush()
1059             next
1060         }
1061 
1062         { print; fflush() }
1063     ' "$@"
1064 }
1065 
1066 # show a `dad` JOKE from the web, sometimes even a very funny one
1067 joke() {
1068     curl --show-error -s https://icanhazdadjoke.com | fold -s |
1069         awk '{ gsub(/ *\r?$/, ""); print }'
1070 }
1071 
1072 # convert binary KiloBytes into bytes
1073 kb() {
1074     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
1075         awk '/./ { printf "%.2f\n", 1024 * $0; fflush() }' |
1076         sed 's-\.00*$--'
1077 }
1078 
1079 # Line xARGS: `xargs` using line separators, which handles filepaths
1080 # with spaces, as long as the standard input has 1 path per line
1081 largs() {
1082     awk -v ORS='\000' '
1083         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1084         { gsub(/\r$/, ""); print; fflush() }
1085     ' | xargs -0 "$@"
1086 }
1087 
1088 # convert pounds (LB) into kilograms
1089 lb() {
1090     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
1091         awk '/./ { printf "%.2f\n", 0.45359237 * $0; fflush() }'
1092 }
1093 
1094 # convert a mix of pounds (LB) and weight-ounces (OZ) into kilograms
1095 lboz() {
1096     local lb="${1:-0}"
1097     lb="$(echo "${lb}" | sed 's-_--g')"
1098     local oz="${2:-0}"
1099     oz="$(echo "${oz}" | sed 's-_--g')"
1100     awk "BEGIN { print 0.45359237 * ${lb} + 0.028349523 * ${oz}; exit }"
1101 }
1102 
1103 # ensure lines are never accidentally joined across files, by always emitting
1104 # a line-feed at the end of each line
1105 lines() { awk '{ print; fflush() }' "$@"; }
1106 
1107 # regroup adjacent lines into n-item tab-separated lines
1108 lineup() {
1109     local n="${1:-0}"
1110     [ $# -gt 0 ] && shift
1111 
1112     if [ "$n" -le 0 ]; then
1113         awk '
1114             NR > 1 { printf "\t" }
1115             { printf "%s", $0; fflush() }
1116             END { if (NR > 0) print "" }
1117         ' "$@"
1118         return $?
1119     fi
1120 
1121     awk -v n="$n" '
1122         NR % n != 1 && n > 1 { printf "\t" }
1123         { printf "%s", $0; fflush() }
1124         NR % n == 0 { print ""; fflush() }
1125         END { if (NR % n != 0) print "" }
1126     ' "$@"
1127 }
1128 
1129 # LOwercase line, check (awk) COndition: on each success, the original line
1130 # is output with its original letter-casing, as its lower-cased version is
1131 # only a convenience meant for the condition
1132 loco() {
1133     local cond="${1:-1}"
1134     [ $# -gt 0 ] && shift
1135     awk "
1136         {
1137             line = orig = original = \$0
1138             low = lower = tolower(\$0)
1139             \$0 = lower
1140         }
1141         ${cond} { print line; fflush() }
1142     " "$@"
1143 }
1144 
1145 # LOWercase all ASCII symbols
1146 low() { awk '{ print tolower($0); fflush() }' "$@"; }
1147 
1148 # LOWERcase all ASCII symbols
1149 lower() { awk '{ print tolower($0); fflush() }' "$@"; }
1150 
1151 # only keep lines which match any of the regexes given
1152 match() {
1153     awk '
1154         BEGIN { for (i = 1; i < ARGC; i++) { e[i] = ARGV[i]; delete ARGV[i] } }
1155 
1156         {
1157             for (i = 1; i < ARGC; i++) {
1158                 if ($0 ~ e[i]) {
1159                     print; fflush()
1160                     got++
1161                     next
1162                 }
1163             }
1164         }
1165 
1166         END { exit(got == 0) }
1167     ' "${@:-[^\r]}"
1168 }
1169 
1170 # MAX Width truncates lines up to the given number of items/bytes given, or up
1171 # to 80 by default; output lines end with an ANSI reset-code, in case input
1172 # lines use ANSI styles
1173 maxw() {
1174     local maxwidth="${1:-80}"
1175     [ $# -gt 0 ] && shift
1176     awk -v maxw="${maxwidth}" '
1177         {
1178             gsub(/\r$/, "")
1179             printf("%s\x1b[0m\n", substr($0, 1, maxw)); fflush()
1180         }
1181     ' "$@"
1182 }
1183 
1184 # convert binary MegaBytes into bytes
1185 mb() {
1186     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
1187         awk '/./ { printf "%.2f\n", 1048576 * $0; fflush() }' |
1188         sed 's-\.00*$--'
1189 }
1190 
1191 # convert MIles into kilometers
1192 mi() {
1193     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
1194         awk '/./ { printf "%.2f\n", 1.609344 * $0; fflush() }'
1195 }
1196 
1197 # convert MIles² (squared) into kilometers²
1198 mi2() {
1199     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
1200         awk '/./ { printf "%.2f\n", 2.5899881103360 * $0 }'
1201 }
1202 
1203 # MINimize DECimalS ignores all trailing decimal zeros in numbers, and even
1204 # the decimal dots themselves, when decimals in a number are all zeros
1205 mindecs() {
1206     awk '{ gsub(/\r$/, ""); print; fflush() }' "$@" |
1207         sed -u -E 's-([0-9]+)\.0+-\1-g; s-([0-9]+\.[0-9]*[1-9])0+-\1-g'
1208 }
1209 
1210 # convert Miles Per Hour into kilometers per hour
1211 mph() {
1212     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
1213         awk '/./ { printf "%.2f\n", 1.609344 * $0 }'
1214 }
1215 
1216 # Number all lines, using a tab right after each line number
1217 n() { awk '{ printf "%d\t%s\n", NR, $0; fflush() }' "$@"; }
1218 
1219 # convert Nautical MIles into kilometers
1220 nmi() {
1221     echo "${@:-1}" | sed -E 's-_--g; s- +-\n-g' |
1222         awk '/./ { printf "%.2f\n", 1.852 * $0; fflush() }'
1223 }
1224 
1225 # Nice Ps shows/lists all current processes shown by `ps`
1226 np() {
1227     local res
1228     local code
1229     # res="$(ps "${@:-auxf}")"
1230     res="$(ps "${@:-aux}")"
1231     code=$?
1232     if [ "${code}" -ne 0 ]; then
1233         return "${code}"
1234     fi
1235 
1236     echo "${res}" | awk '
1237         BEGIN {
1238             d = strftime("%a %b %d")
1239             t = strftime("%H:%M:%S")
1240             # printf "\x1b[7m%30s%s  %s%30s\x1b[0m\n\n", "", d, t, ""
1241             fmt = "\x1b[38;2;128;128;128m\x1b[7m%30s%s  %s%30s\x1b[0m\n\n"
1242             printf fmt, "", d, t, ""
1243         }
1244 
1245         (NR - 1) % 5 == 1 && NR > 1 { print "" }
1246 
1247         $1 == "root" {
1248             gsub(/^/, "\x1b[38;2;52;101;164m")
1249             gsub(/ +/, "&\x1b[0m\x1b[38;2;52;101;164m")
1250             gsub(/$/, "\x1b[0m")
1251         }
1252 
1253         {
1254             gsub(/ \? /, "\x1b[38;2;135;135;175m&\x1b[0m")
1255             gsub(/0\.0/, "\x1b[38;2;135;135;175m&\x1b[0m")
1256             gsub(/0:00/, "\x1b[38;2;135;135;175m&\x1b[0m")
1257             printf "%3d  %s\n", NR - 1, $0
1258         }
1259     ' | less -JMKiCRS
1260 }
1261 
1262 # NULl-terminate LINES ends each stdin line with a null byte, instead of a
1263 # line-feed byte
1264 nullines() {
1265     awk -v ORS='\000' '
1266         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1267         { gsub(/\r$/, ""); print; fflush() }
1268     ' "$@"
1269 }
1270 
1271 # Print Awk expression
1272 pa() { awk "BEGIN { print ${1:-0}; exit }"; }
1273 
1274 # Paragraph AWK runs `awk` in block/paragraph/multiline input-mode
1275 pawk() { stdbuf -oL awk -F='' -v RS='' "$@"; }
1276 
1277 # pick lines, using all the 1-based line-numbers given
1278 picklines() {
1279     awk '
1280         BEGIN { m = ARGC - 1; if (ARGC == 1) exit 0 }
1281         BEGIN { for (i = 1; i <= m; i++) { p[i] = ARGV[i]; delete ARGV[i] } }
1282         { l[++n] = $0 }
1283         END {
1284             for (i = 1; i <= m; i++) {
1285                 j = p[i]
1286                 if (j < 0) j += NR + 1
1287                 if (0 < j && j <= NR) print l[j]
1288             }
1289         }
1290     ' "$@"
1291 }
1292 
1293 # make text plain, by ignoring ANSI terminal styling
1294 plain() { awk '{ gsub(/\x1b\[[0-9;]*[A-Za-z]/, ""); print; fflush() }' "$@"; }
1295 
1296 # PREcede (input) MEMO, prepends a first highlighted line to stdin lines
1297 prememo() {
1298     awk '
1299         BEGIN {
1300             if (ARGC > 1) printf "\x1b[7m"
1301             for (i = 1; i < ARGC; i++) {
1302                 if (i > 1) printf " "
1303                 printf "%s", ARGV[i]
1304                 delete ARGV[i]
1305             }
1306             if (ARGC > 1) printf "\x1b[0m\n"
1307             fflush()
1308         }
1309         { print; fflush() }
1310     ' "$@"
1311 }
1312 
1313 # start by joining all arguments given as a tab-separated-items line of output,
1314 # followed by all lines from stdin verbatim
1315 pretsv() {
1316     awk '
1317         BEGIN {
1318             for (i = 1; i < ARGC; i++) {
1319                 if (i > 1) printf "\t"
1320                 printf "%s", ARGV[i]
1321                 delete ARGV[i]
1322             }
1323             if (ARGC > 1) printf "\n"
1324             fflush()
1325         }
1326         { print; fflush() }
1327     ' "$@"
1328 }
1329 
1330 # show/list all current processes
1331 processes() {
1332     local res
1333     res="$(ps aux)"
1334     echo "${res}" | awk '!/ps aux$/' | sed -E \
1335         -e 's- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1' \
1336         -e 's- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1'
1337 }
1338 
1339 # Pick Tab Separated-Values, trying to match the column names given using the
1340 # first line from stdin. Matching works by first trying exact matching, then
1341 # case-insensitive matching, when there's no exact match, finally giving up
1342 # when there's no match either way.
1343 ptsv() {
1344     awk -F "\t" '
1345         BEGIN {
1346             for (i = 1; i < ARGC; i++) {
1347                 colnames[++n] = ARGV[i]
1348                 lownames[n] = tolower(ARGV[i])
1349                 delete ARGV[i]
1350             }
1351         }
1352 
1353         function findcol(name, lowname, i) {
1354             for (i = 1; i <= NF; i++) if (name == $i) return i
1355             for (i = 1; i <= NF; i++) if (lowname == tolower($i)) return i
1356 
1357             if (1 <= name && name <= NF) return name + 0
1358             if (name < 0 && -name <= NF) return NF + name + 1
1359 
1360             return 0
1361         }
1362 
1363         { gsub(/\r$/, "") }
1364 
1365         NR == 1 {
1366             for (i = 1; i <= NF; i++) {
1367                 j = findcol(colnames[i], lownames[i])
1368                 if (j > 0) pick[++numcols] = j
1369 
1370                 if (j == 0) {
1371                     fmt = "\x1b[31mno column match for \"%s\"\x1b[0m\n"
1372                     printf(fmt, colnames[i]) > "/dev/stderr"
1373                     errors++
1374                 }
1375             }
1376 
1377             if (errors > 0) exit 1
1378             if (numcols == 0) exit
1379         }
1380 
1381         {
1382             for (i = 1; i <= numcols; i++) {
1383                 if (i > 1) printf "\t"
1384                 printf "%s", $(pick[i])
1385             }
1386 
1387             printf "\n"; fflush()
1388         }
1389     ' "$@"
1390 }
1391 
1392 # Red AWK styles lines satisfying an AWK condition/expression red, keeping all
1393 # other lines the same
1394 rawk() {
1395     local cond="${1:-1}"
1396     [ $# -gt 0 ] && shift
1397 
1398     awk '
1399         { low = lower = tolower($0) }
1400 
1401         '"${cond}"' {
1402             gsub(/\x1b\[0m/, "\x1b[0m\x1b[38;2;204;0;0m")
1403             printf "\x1b[38;2;204;0;0m%s\x1b[0m\n", $0; fflush()
1404             next
1405         }
1406 
1407         { print; fflush() }
1408     ' "$@"
1409 }
1410 
1411 # Realign all detected columns, right-aligning any detected numbers in any
1412 # column.
1413 realign() {
1414     awk '
1415         function match_number(v) {
1416             return match(v, /^[+-]?[0-9]+(\.[0-9]+)?$/)
1417         }
1418 
1419         function match_dot_digits(v) {
1420             return match(v, /\.[0-9]+$/)
1421         }
1422 
1423         { gsub(/\r$/, "") }
1424 
1425         FNR == 1 {
1426             FS = ($0 ~ /\t/) ? "\t" : " "
1427             $0 = $0
1428         }
1429 
1430         {
1431             for (i = 1; i <= NF; i++) {
1432                 data[NR][i] = $i
1433 
1434                 if (match_number($i)) {
1435                     if (match_dot_digits($i)) {
1436                         dd = RLENGTH
1437                         if (dot_decs[i] < dd) dot_decs[i] = dd
1438                         iw = RSTART - 1
1439                         if (int_widths[i] < iw) int_widths[i] = iw
1440                     } else {
1441                         w = length($i)
1442                         if (int_widths[i] < w) int_widths[i] = w
1443                     }
1444 
1445                     continue
1446                 }
1447 
1448                 w = length($i)
1449                 if (widths[i] < w) widths[i] = w
1450             }
1451         }
1452 
1453         END {
1454             # fix column-widths using the number-padding info
1455             for (i = 1; i <= NF; i++) {
1456                 w = int_widths[i] + dot_decs[i]
1457                 if (widths[i] < w) widths[i] = w
1458             }
1459 
1460             for (i = 1; i <= NR; i++) {
1461                 last = length(data[i])
1462 
1463                 for (j = 1; j <= last; j++) {
1464                     if (j > 1) printf "  " # put 2-space gaps between columns
1465 
1466                     v = data[i][j]
1467 
1468                     if (!match_number(v)) {
1469                         # avoid adding trailing spaces at the end of lines
1470                         printf "%*s", (j == last) ? 0 : -widths[j], v
1471                         continue
1472                     }
1473 
1474                     w = length(v)
1475                     if (match_dot_digits(v)) {
1476                         dd = RLENGTH
1477                         iw = RSTART - 1
1478                     } else {
1479                         dd = 0
1480                         iw = w
1481                     }
1482 
1483                     dpad = dot_decs[j] - dd
1484                     ipad = int_widths[j] - iw
1485                     if (ipad < 0) ipad = 0
1486                     lpad = widths[j] - (ipad + w + dpad)
1487                     if (lpad < 0) lpad = 0
1488 
1489                     # avoid adding trailing spaces at the end of lines
1490                     if (j == last) dpad = 0
1491 
1492                     printf "%*s%*s%s%*s", lpad, "", ipad, "", v, dpad, ""
1493                 }
1494 
1495                 printf "\n"
1496             }
1497         }
1498     ' "$@"
1499 }
1500 
1501 # reflow/trim lines of prose (text) to improve its legibility: it's especially
1502 # useful when the text is pasted from web-pages being viewed in reader mode
1503 reprose() {
1504     local w="${1:-80}"
1505     [ $# -gt 0 ] && shift
1506     awk '
1507         FNR == 1 && NR > 1 { print "" }
1508         { gsub(/\r$/, ""); print; fflush() }
1509     ' "$@" | fold -s -w "$w" | sed -u -E 's- *\r?$--'
1510 }
1511 
1512 # Realign Tab Separated-Values, by padding with spaces to match each column's
1513 # widest value, right-aligning all numbers.
1514 rtsv() {
1515     awk -F "\t" '
1516         function match_number(v) {
1517             return match(v, /^[+-]?[0-9]+(\.[0-9]+)?$/)
1518         }
1519 
1520         function match_dot_digits(v) {
1521             return match(v, /\.[0-9]+$/)
1522         }
1523 
1524         {
1525             gsub(/\r$/, "")
1526 
1527             for (i = 1; i <= NF; i++) {
1528                 data[NR][i] = $i
1529 
1530                 if (match_number($i)) {
1531                     if (match_dot_digits($i)) {
1532                         dd = RLENGTH
1533                         if (dot_decs[i] < dd) dot_decs[i] = dd
1534                         iw = RSTART - 1
1535                         if (int_widths[i] < iw) int_widths[i] = iw
1536                     } else {
1537                         w = length($i)
1538                         if (int_widths[i] < w) int_widths[i] = w
1539                     }
1540 
1541                     continue
1542                 }
1543 
1544                 w = length($i)
1545                 if (widths[i] < w) widths[i] = w
1546             }
1547         }
1548 
1549         END {
1550             # fix column-widths using the number-padding info
1551             for (i = 1; i <= NF; i++) {
1552                 w = int_widths[i] + dot_decs[i]
1553                 if (widths[i] < w) widths[i] = w
1554             }
1555 
1556             for (i = 1; i <= NR; i++) {
1557                 last = length(data[i])
1558 
1559                 for (j = 1; j <= last; j++) {
1560                     if (j > 1) printf "  " # put 2-space gaps between columns
1561 
1562                     v = data[i][j]
1563 
1564                     if (!match_number(v)) {
1565                         # avoid adding trailing spaces at the end of lines
1566                         printf "%*s", (j == last) ? 0 : -widths[j], v
1567                         continue
1568                     }
1569 
1570                     w = length(v)
1571                     if (match_dot_digits(v)) {
1572                         dd = RLENGTH
1573                         iw = RSTART - 1
1574                     } else {
1575                         dd = 0
1576                         iw = w
1577                     }
1578 
1579                     dpad = dot_decs[j] - dd
1580                     ipad = int_widths[j] - iw
1581                     if (ipad < 0) ipad = 0
1582                     lpad = widths[j] - (ipad + w + dpad)
1583                     if (lpad < 0) lpad = 0
1584 
1585                     # avoid adding trailing spaces at the end of lines
1586                     if (j == last) dpad = 0
1587 
1588                     printf "%*s%*s%s%*s", lpad, "", ipad, "", v, dpad, ""
1589                 }
1590 
1591                 printf "\n"
1592             }
1593         }
1594     ' "$@"
1595 }
1596 
1597 # Show Files (and folders), coloring folders and links
1598 sf() {
1599     local arg
1600     local gap=0
1601     local options='-JMKiCRS'
1602 
1603     if [ $# -le 1 ]; then
1604         options='--header=1 -JMKiCRS'
1605     fi
1606 
1607     for arg in "${@:-.}"; do
1608         [ "${gap}" -gt 0 ] && printf "\n"
1609         printf "\e[7m%s\e[0m\n\n" "$(realpath "${arg}")"
1610         gap=1
1611 
1612         ls -al --file-type --color=never --time-style iso "${arg}" | awk '
1613             BEGIN {
1614                 drep = "\x1b[38;2;0;135;255m\x1b[48;2;228;228;228m&\x1b[0m"
1615                 lrep = "\x1b[38;2;0;135;95m\x1b[48;2;228;228;228m&\x1b[0m"
1616             }
1617 
1618             NR < 4 { next }
1619             (NR - 3) % 5 == 1 && (NR - 3) > 1 { print "" }
1620 
1621             {
1622                 gsub(/^(d[rwx-]+)/, drep)
1623                 gsub(/^(l[rwx-]+)/, lrep)
1624                 printf "%6d  %s\n", NR - 3, $0; fflush()
1625             }
1626         '
1627     done | less "${options}"
1628 }
1629 
1630 # Show File Sizes
1631 sfs() {
1632     # turn arg-list into single-item lines
1633     printf "%s\x00" "$@" |
1634     # calculate file-sizes, and reverse-sort results
1635     xargs -0 wc -c | sort -rn |
1636     # add/realign fields to improve legibility
1637     awk '
1638         # start output with a header-like line, and add a MiB field
1639         BEGIN { printf "%6s  %10s  %8s  name\n", "n", "bytes", "MiB"; fflush() }
1640         # make table breathe with empty lines, so tall outputs are readable
1641         (NR - 1) % 5 == 1 && NR > 1 { print "" }
1642         # emit regular output lines
1643         {
1644             printf "%6d  %10d  %8.2f  ", NR - 1, $1, $1 / 1048576
1645             # first field is likely space-padded
1646             gsub(/^ */, "")
1647             # slice line after the first field, as filepaths can have spaces
1648             $0 = substr($0, length($1) + 1)
1649             # first field is likely space-padded
1650             gsub(/^ /, "")
1651             printf "%s\n", $0; fflush()
1652         }
1653     ' |
1654     # make zeros in the MiB field stand out with a special color
1655     awk '
1656         {
1657             gsub(/ 00*\.00* /, "\x1b[38;2;135;135;175m&\x1b[0m")
1658             print; fflush()
1659         }
1660     ' |
1661     # make result interactively browsable
1662     less -JMKiCRS
1663 }
1664 
1665 # SHell-QUOTE each line from the input(s): this is useful to make lines of
1666 # single-filepaths compatible with `xargs`, since standard shell settings
1667 # get in the way of filepaths with spaces and other special symbols in them
1668 shquote() {
1669     awk '
1670         {
1671             s = $0
1672             gsub(/\r$/, "", s)
1673             gsub(/\\/, "\\\\", s)
1674             gsub(/"/, "\\\"", s)
1675             gsub(/`/, "\\`", s)
1676             gsub(/\$/, "\\$", s)
1677             printf "\"%s\"\n", s; fflush()
1678         }
1679     ' "$@"
1680 }
1681 
1682 # emit the first line as is, sorting all lines after that, using the
1683 # `sort` command, passing all/any arguments/options to it
1684 sortrest() {
1685     awk -v sort="sort $*" '
1686         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1687         { gsub(/\r$/, "") }
1688         NR == 1 { print; fflush() }
1689         NR > 1 { print | sort }
1690     '
1691 }
1692 
1693 # SORt Tab-Separated Values: emit the first line as is, sorting all lines after
1694 # that, using the `sort` command in TSV (tab-separated values) mode, passing
1695 # all/any arguments/options to it
1696 sortsv() {
1697     awk -v sort="sort -t \"$(printf '\t')\" $*" '
1698         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1699         { gsub(/\r$/, "") }
1700         NR == 1 { print; fflush() }
1701         NR > 1 { print | sort }
1702     '
1703 }
1704 
1705 # ignore leading spaces, trailing spaces, even runs of multiple spaces
1706 # in the middle of lines, as well as trailing carriage returns
1707 squeeze() {
1708     awk '
1709         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1710         {
1711             gsub(/^ +| *\r?$/, "")
1712             gsub(/ *\t */, "\t")
1713             gsub(/  +/, " ")
1714             print; fflush()
1715         }
1716     ' "$@"
1717 }
1718 
1719 # SQUeeze and stOMP, by ignoring leading spaces, trailing spaces, even runs
1720 # of multiple spaces in the middle of lines, as well as trailing carriage
1721 # returns, while also turning runs of empty lines into single empty lines,
1722 # and ignoring leading/trailing empty lines, effectively also `squeezing`
1723 # lines vertically
1724 squomp() {
1725     awk '
1726         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1727         /^\r?$/ { empty = 1; next }
1728         empty { if (n > 0) print ""; empty = 0 }
1729         {
1730             gsub(/^ +| *\r?$/, "")
1731             gsub(/ *\t */, "\t")
1732             gsub(/  +/, " ")
1733             print; fflush()
1734             n++
1735         }
1736     ' "$@"
1737 }
1738 
1739 # turn runs of empty lines into single empty lines, effectively squeezing
1740 # paragraphs vertically, so to speak; runs of empty lines both at the start
1741 # and at the end are ignored
1742 stomp() {
1743     awk '
1744         /^\r?$/ { empty = 1; next }
1745         empty { if (n > 0) print ""; empty = 0 }
1746         { print; fflush(); n++ }
1747     ' "$@"
1748 }
1749 
1750 # STRike-thru (lines) with AWK
1751 strawk() {
1752     local cond="${1:-1}"
1753     [ $# -gt 0 ] && shift
1754     awk '
1755         { low = lower = tolower($0) }
1756         '"${cond}"' {
1757             gsub(/\x1b\[0m/, "\x1b[0m\x1b[9m")
1758             printf "\x1b[9m%s\x1b[0m\n", $0; fflush()
1759             next
1760         }
1761         { print; fflush() }
1762     ' "$@"
1763 }
1764 
1765 # Sort Tab-Separated Values: emit the first line as is, sorting all lines after
1766 # that, using the `sort` command in TSV (tab-separated values) mode, passing
1767 # all/any arguments/options to it
1768 stsv() {
1769     awk -v sort="sort -t \"$(printf '\t')\" $*" '
1770         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1771         { gsub(/\r$/, "") }
1772         NR == 1 { print; fflush() }
1773         NR > 1 { print | sort }
1774     '
1775 }
1776 
1777 # use the result of the `awk` function `substr` for each line
1778 substr() {
1779     local start="${1:-1}"
1780     local length="${2:-80}"
1781     [ $# -gt 0 ] && shift
1782     [ $# -gt 0 ] && shift
1783     awk -v start="${start}" -v len="${length}" \
1784         '{ print substr($0, start, len); fflush() }' "$@"
1785 }
1786 
1787 # append a final Tab-Separated-Values line with the sums of all columns from
1788 # the input table(s) given; items from first lines aren't counted/added
1789 sumtsv() {
1790     awk -F "\t" '
1791         # FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1792 
1793         {
1794             gsub(/\r$/, "")
1795             print; fflush()
1796             if (width < NF) width = NF
1797         }
1798 
1799         FNR > 1 { for (i = 1; i <= NF; i++) sums[i] += $i + 0 }
1800 
1801         END {
1802             for (i = 1; i <= width; i++) {
1803                 if (i > 1) printf "\t"
1804                 printf "%s", sums[i] ""
1805             }
1806             if (width > 0) printf "\n"
1807         }
1808     ' "$@"
1809 }
1810 
1811 # show a reverse-sorted tally of all lines read, where ties are sorted
1812 # alphabetically
1813 tally() {
1814     awk -v sortcmd="sort -t \"$(printf '\t')\" -rnk2 -k1d" '
1815         # reassure users by instantly showing the header
1816         BEGIN { print "value\ttally"; fflush() }
1817         { gsub(/\r$/, ""); t[$0]++ }
1818         END { for (k in t) { printf("%s\t%d\n", k, t[k]) | sortcmd } }
1819     ' "$@"
1820 }
1821 
1822 # Tab AWK: TSV-specific I/O settings for `awk`
1823 # tawk() { awk -F "\t" -v OFS="\t" "$@"; }
1824 
1825 # Tab AWK: TSV-specific I/O settings for `awk`
1826 tawk() { stdbuf -oL awk -F "\t" -v OFS="\t" "$@"; }
1827 
1828 # Titled conCATenate Lines highlights each filename, before emitting its lines
1829 tcatl() {
1830     awk '
1831         FNR == 1 { printf "\x1b[7m%s\x1b[0m\n", FILENAME; fflush() }
1832         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1833         { gsub(/\r$/, ""); print; fflush() }
1834     ' "$@"
1835 }
1836 
1837 # simulate the cadence of old-fashioned teletype machines, by slowing down
1838 # the output of ASCII/UTF-8 symbols from the standard-input
1839 teletype() {
1840     awk '
1841         {
1842             gsub(/\r$/, "")
1843 
1844             n = length
1845             for (i = 1; i <= n; i++) {
1846                 if (code = system("sleep 0.015")) exit code
1847                 printf "%s", substr($0, i, 1); fflush()
1848             }
1849             if (code = system("sleep 0.75")) exit code
1850             printf "\n"; fflush()
1851         }
1852 
1853         # END { if (NR > 0 && code != 0) printf "\n" }
1854     ' "$@"
1855 }
1856 
1857 # lowercase all ASCII symbols
1858 tolower() { awk '{ print tolower($0); fflush() }' "$@"; }
1859 
1860 # get the processes currently using the most cpu
1861 topcpu() {
1862     local n="${1:-10}"
1863     [ "$n" -gt 0 ] && ps aux | awk '
1864         NR == 1 { print; fflush() }
1865         NR > 1 { print | "sort -rnk3" }
1866     ' | head -n "$(("$n" + 1))"
1867 }
1868 
1869 # get the processes currently using the most memory
1870 topmemory() {
1871     local n="${1:-10}"
1872     [ "$n" -gt 0 ] && ps aux | awk '
1873         NR == 1 { print; fflush() }
1874         NR > 1 { print | "sort -rnk6" }
1875     ' | head -n "$(("$n" + 1))"
1876 }
1877 
1878 # transpose (switch) rows and columns from tables
1879 transpose() {
1880     awk '
1881         { gsub(/\r$/, "") }
1882 
1883         FNR == 1 { FS = ($0 ~ /\t/) ? "\t" : " "; $0 = $0 }
1884 
1885         {
1886             for (i = 1; i <= NF; i++) lines[i][NR] = $i
1887             if (maxitems < NF) maxitems = NF
1888         }
1889 
1890         END {
1891             for (j = 1; j <= maxitems; j++) {
1892                 for (i = 1; i <= NR; i++) {
1893                     if (i > 1) printf "\t"
1894                     printf "%s", lines[j][i]
1895                 }
1896                 printf "\n"
1897             }
1898         }
1899     ' "$@"
1900 }
1901 
1902 # Underline (lines) with AWK
1903 uawk() {
1904     local cond="${1:-1}"
1905     [ $# -gt 0 ] && shift
1906     awk '
1907         { low = lower = tolower($0) }
1908         '"${cond}"' {
1909             gsub(/\x1b\[0m/, "\x1b[0m\x1b[4m")
1910             printf "\x1b[4m%s\x1b[0m\n", $0; fflush()
1911             next
1912         }
1913         { print; fflush() }
1914     ' "$@"
1915 }
1916 
1917 # Underline Every few lines: make groups of 5 lines (by default) stand out by
1918 # underlining the last line of each
1919 alias ue=zebra
1920 
1921 # deduplicate lines, keeping them in their original order
1922 unique() { awk '!c[$0]++ { print; fflush() }' "$@"; }
1923 
1924 # convert United States Dollars into CAnadian Dollars, using the latest
1925 # official exchange rates from the bank of canada; during weekends, the
1926 # latest rate may be from a few days ago; the default amount of usd to
1927 # convert is 1, when not given
1928 usd2cad() {
1929     local site='https://www.bankofcanada.ca/valet/observations/group'
1930     local csv_rates="${site}/FX_RATES_DAILY/csv"
1931     local url
1932     url="${csv_rates}?start_date=$(date -d '3 days ago' +'%Y-%m-%d')"
1933     curl -s "${url}" | awk -F, -v amount="$(echo "${1:-1}" | sed 's-_--g')" '
1934         /USD/ { for (i = 1; i <= NF; i++) if($i ~ /USD/) j = i }
1935         END { gsub(/"/, "", $j); if (j != 0) printf "%.2f\n", amount * $j }'
1936 }
1937 
1938 # find all WEB/hyperLINKS (https:// and http://) in the input text
1939 weblinks() {
1940     awk '
1941         BEGIN { e = "https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*" }
1942         {
1943             # match all links in the current line
1944             for (s = $0; match(s, e); s = substr(s, RSTART + RLENGTH)) {
1945                 print substr(s, RSTART, RLENGTH); fflush()
1946             }
1947         }
1948     ' "$@"
1949 }
1950 
1951 # run `xargs`, using whole lines as extra arguments
1952 xl() {
1953     awk -v ORS='\000' '
1954         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
1955         { gsub(/\r$/, ""); print; fflush() }
1956     ' | xargs -0 "$@"
1957 }
1958 
1959 # underline every few lines: make groups of 5 lines (by default) stand out by
1960 # underlining the last line of each
1961 zebra() {
1962     local n="${1:-5}"
1963     [ $# -gt 0 ] && shift
1964     awk -v n="$n" '
1965         BEGIN { if (n == 0) n = -1 }
1966         NR % n == 0 && NR != 1 {
1967             gsub(/\x1b\[0m/, "\x1b[0m\x1b[4m")
1968             printf("\x1b[4m%s\x1b[0m\n", $0); fflush()
1969             next
1970         }
1971         { print; fflush() }
1972     ' "$@"
1973 }