File: grawk.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright (c) 2026 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the "Software"), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # grawk [options...] [awk expression...] [filenames...]
  27 #
  28 #
  29 # GRoup via AWK groups lines using common results of the AWK expression given.
  30 # When not given any, whole lines are used, which effectively puts identical
  31 # lines next to each other.
  32 #
  33 # The handy case-insensitive shortcut options may cause this tool to fail,
  34 # if the main AWK tool installed doesn't support the special IGNORECASE
  35 # variable.
  36 #
  37 # The AWK options available only in single-dash versions are
  38 #
  39 #   -F fs               use `fs` for the field separator (the `FS` variable)
  40 #   -V                  show the AWK version installed
  41 #   -v var=val          set a variable to a given value
  42 #
  43 # The other options are, available both in single and double-dash versions
  44 #
  45 #   -h, -help             show this help message
  46 #   -ins, -insensitive    match regexes case-insensitively; fail if unsupported
  47 #   -tsv                  split fields using tabs, same as using -F "\t"
  48 
  49 
  50 case "$1" in
  51     -h|--h|-help|--help)
  52         awk '/^# +grawk /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  53         exit 0
  54     ;;
  55 esac
  56 
  57 command='awk'
  58 if { [ -p /dev/stdout ] || [ -t 1 ]; } && [ -e /usr/bin/stdbuf ]; then
  59     command='stdbuf -oL awk'
  60 fi
  61 
  62 tsv=0
  63 case_insensitive=0
  64 
  65 while [ $# -gt 0 ]; do
  66     arg="$1"
  67 
  68     if [ "${arg}" = "--" ]; then
  69         shift
  70         break
  71     fi
  72 
  73     case "${arg}" in
  74         -F)
  75             shift
  76             if [ $# -eq 0 ]; then
  77                 printf "expected value after -F option\n" >&2
  78                 exit 1
  79             fi
  80             command="${command} -F $1"
  81             shift
  82             continue
  83         ;;
  84 
  85         -F*)
  86             command="${command} ${arg}"
  87             shift
  88             continue
  89         ;;
  90 
  91         -v)
  92             shift
  93             if [ $# -eq 0 ]; then
  94                 printf "expected variable assignment after -v option\n" >&2
  95                 exit 1
  96             fi
  97             command="${command} -v $1"
  98             shift
  99             continue
 100         ;;
 101 
 102         -ins|--ins|-insensitive|--insensitive)
 103             case_insensitive=1
 104             shift
 105             continue
 106         ;;
 107 
 108         -tsv|--tsv)
 109             tsv=1
 110             shift
 111             continue
 112         ;;
 113 
 114         -*)
 115             command="${command} ${arg}"
 116             shift
 117             continue
 118         ;;
 119     esac
 120 
 121     break
 122 done
 123 
 124 code="${1:-\$0}"
 125 [ $# -gt 0 ] && shift
 126 
 127 # show all non-existing files given
 128 failed=0
 129 for arg in "$@"; do
 130     if [ "${arg}" = "-" ]; then
 131         continue
 132     fi
 133     if [ ! -e "${arg}" ]; then
 134         printf "no file named \"%s\"\n" "${arg}" > /dev/stderr
 135         failed=1
 136     fi
 137 done
 138 
 139 if [ "${failed}" -gt 0 ]; then
 140     exit 2
 141 fi
 142 
 143 ci='
 144     BEGIN {
 145         if (IGNORECASE == "") {
 146             m = "your `awk` command lacks case-insensitive regex-matching"
 147             printf("\x1b[38;2;204;0;0m%s\x1b[0m\n", m) > "/dev/stderr"
 148             exit 125
 149         }
 150         IGNORECASE = 1
 151     }
 152 '
 153 if [ "${case_insensitive}" -eq 0 ]; then
 154     ci=''
 155 fi
 156 
 157 src="${ci}"'
 158     FNR == 1 { FS = /\t/ ? "\t" : " "; $0 = $0 }
 159 
 160     {
 161         k = '"${code}"'
 162         if (!(k in groups)) ordkeys[++oklen] = k
 163         groups[k][length(groups[k]) + 1] = $0
 164     }
 165 
 166     END {
 167         for (i = 1; i <= oklen; i++) {
 168             k = ordkeys[i]
 169             n = length(groups[k])
 170             for (j = 1; j <= n; j++) print groups[k][j]
 171         }
 172     }
 173 '
 174 
 175 if [ "${tsv}" -eq 1 ]; then
 176     ${command} -F "\t" "${src}" "$@"
 177 else
 178     ${command} "${src}" "$@"
 179 fi