File: pick.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # pick [options...] [columns...]
  27 #
  28 # Pick 1 or more fields from each line using any mix of 1-based indices, 0,
  29 # negative indices (which count backward from the last item), or even column
  30 # names, which are matched to indices using items from the first/header line
  31 # from the input.
  32 #
  33 # Output is always lines of tab-separated items.
  34 #
  35 # Name-matching is first tried exactly but, failing that, case-insensitive
  36 # matching is tried as a fallback.
  37 #
  38 # Indices can also be 0, which picks all columns: giving 0 as the only index
  39 # to this script is a convenient way to turn aligned column-like lines into
  40 # proper TSV lines.
  41 #
  42 # To pick columns from TSV (tab-separated values) lines/input, use any among
  43 # these as the leading option: `-t`, `-tab` `-tabs`, or `-tsv`.
  44 #
  45 # Double-dashed variants of all supported options are also supported.
  46 
  47 
  48 # handle leading options
  49 tsv=0
  50 case "$1" in
  51     -h|--h|-help|--help)
  52         awk '/^# +pick/, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  53         exit 0
  54     ;;
  55     -t|--t|-tab|--tab|-tabs|--tabs|-tsv|--tsv)
  56         # enable TSV (tab-separated values) mode
  57         tsv=1
  58         shift
  59     ;;
  60 esac
  61 
  62 
  63 awk -v tsv="${tsv}" '
  64 BEGIN {
  65     if (tsv) FS = "\t"
  66     for (i = 1; i < ARGC; i++) {
  67         ind[i] = ARGV[i]
  68         delete ARGV[i]
  69     }
  70 }
  71 
  72 function error(msg) {
  73     printf "\x1b[31m%s\x1b[0m\n", msg > "/dev/stderr"
  74 }
  75 
  76 NR == 1 {
  77     width = NF
  78     if (NF == 0) {
  79         error("first/header line has no items")
  80         exit 1
  81     }
  82 
  83     errors = 0
  84     for (i in ind) {
  85         v = ind[i]
  86         # keep non-negative indices as given
  87         if (v ~ /^[0-9]+$/) continue
  88 
  89         # try to fix negative indices
  90         if (v < 0) {
  91             n = v + NF + 1
  92             if (n < 0) {
  93                 fmt = "%d is too negative; first/header line has %d items"
  94                 error(sprintf(fmt, v, NF))
  95                 errors++
  96             }
  97             ind[i] = n
  98             continue
  99         }
 100 
 101         # try to lookup indices for named columns
 102         found = 0
 103         for (j = 1; j <= NF; j++) {
 104             if (v == $j) {
 105                 ind[i] = j
 106                 found = 1
 107                 break
 108             }
 109         }
 110         if (found) continue
 111 
 112         # try to case-insensitively lookup indices for named columns
 113         l = tolower(v)
 114         for (j = 1; j <= NF; j++) {
 115             if (l == tolower($j)) {
 116                 ind[i] = j
 117                 found = 1
 118                 break
 119             }
 120         }
 121         if (found) continue
 122 
 123         error(sprintf("column named %s not found", v))
 124         errors++
 125     }
 126 
 127     if (errors > 0) exit 1
 128 }
 129 
 130 {
 131     for (i in ind) {
 132         if (i > 1) printf "\t"
 133 
 134         n = ind[i]
 135         # if (n < 0) n += NF + 1
 136         if (n > 0) printf "%s", $n
 137 
 138         if (n == 0) {
 139             for (j = 1; j <= width; j++) {
 140                 if (j > 1) printf "\t"
 141                 printf "%s", $j
 142             }
 143         }
 144     }
 145     print ""
 146 }
 147 ' "$@"