File: pick.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # pick [options...] [columns...]
  27 #
  28 # Pick 1 or more fields from each line using any mix of 1-based indices, 0,
  29 # negative indices (which count backward from the last item), or even column
  30 # names, which are matched to indices using items from the first/header line
  31 # from the input.
  32 #
  33 # Output is always lines of tab-separated items.
  34 #
  35 # Name-matching is first tried exactly but, failing that, case-insensitive
  36 # matching is tried as a fallback.
  37 #
  38 # Indices can also be 0, which picks all columns: giving 0 as the only index
  39 # to this script is a convenient way to turn aligned column-like lines into
  40 # proper TSV lines.
  41 #
  42 # To pick columns from TSV (tab-separated values) lines/input, use any among
  43 # these as the leading option: `-t`, `-tab` `-tabs`, or `-tsv`.
  44 #
  45 # Double-dashed variants of all supported options are also supported.
  46 
  47 
  48 # handle leading options
  49 tsv=0
  50 case "$1" in
  51     -h|--h|-help|--help)
  52         # show help message, extracting the info-comment at the start
  53         # of this file, and quit
  54         awk '/^# +pick/, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  55         exit 0
  56     ;;
  57     -t|--t|-tab|--tab|-tabs|--tabs|-tsv|--tsv)
  58         # enable TSV (tab-separated values) mode
  59         tsv=1
  60         shift
  61     ;;
  62 esac
  63 
  64 
  65 awk -v tsv="${tsv}" '
  66 BEGIN {
  67     if (tsv) FS = "\t"
  68     for (i = 1; i < ARGC; i++) {
  69         ind[i] = ARGV[i]
  70         delete ARGV[i]
  71     }
  72 }
  73 
  74 function error(msg) {
  75     printf "\x1b[31m%s\x1b[0m\n", msg > "/dev/stderr"
  76 }
  77 
  78 NR == 1 {
  79     width = NF
  80     if (NF == 0) {
  81         error("first/header line has no items")
  82         exit 1
  83     }
  84 
  85     errors = 0
  86     for (i in ind) {
  87         v = ind[i]
  88         # keep non-negative indices as given
  89         if (v ~ /^[0-9]+$/) continue
  90 
  91         # try to fix negative indices
  92         if (v < 0) {
  93             n = v + NF + 1
  94             if (n < 0) {
  95                 fmt = "%d is too negative; first/header line has %d items"
  96                 error(sprintf(fmt, v, NF))
  97                 errors++
  98             }
  99             ind[i] = n
 100             continue
 101         }
 102 
 103         # try to lookup indices for named columns
 104         found = 0
 105         for (j = 1; j <= NF; j++) {
 106             if (v == $j) {
 107                 ind[i] = j
 108                 found = 1
 109                 break
 110             }
 111         }
 112         if (found) continue
 113 
 114         # try to case-insensitively lookup indices for named columns
 115         l = tolower(v)
 116         for (j = 1; j <= NF; j++) {
 117             if (l == tolower($j)) {
 118                 ind[i] = j
 119                 found = 1
 120                 break
 121             }
 122         }
 123         if (found) continue
 124 
 125         error(sprintf("column named %s not found", v))
 126         errors++
 127     }
 128 
 129     if (errors > 0) exit 1
 130 }
 131 
 132 {
 133     for (i in ind) {
 134         if (i > 1) printf "\t"
 135 
 136         n = ind[i]
 137         # if (n < 0) n += NF + 1
 138         if (n > 0) printf "%s", $n
 139 
 140         if (n == 0) {
 141             for (j = 1; j <= width; j++) {
 142                 if (j > 1) printf "\t"
 143                 printf "%s", $j
 144             }
 145         }
 146     }
 147     print ""
 148 }
 149 ' "$@"