File: tawk.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright (c) 2026 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the "Software"), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # tawk [options...] [awk expression...] [files...]
  27 #
  28 #
  29 # Tally via AWK group-counts lines using common results of the AWK expression
  30 # given. When not given any, whole lines are used. Results are tab-separated
  31 # lines each with a tally and its respective common transformed value.
  32 #
  33 # The handy case-insensitive shortcut options may cause this tool to fail,
  34 # if the main AWK tool installed doesn't support the special IGNORECASE
  35 # variable.
  36 #
  37 # The AWK options available only in single-dash versions are
  38 #
  39 #   -f fs, -F fs, -Ffs, -F=fs    make `fs` the field separator
  40 #
  41 # The other options are, available both in single and double-dash versions
  42 #
  43 #   -h, -help    show this help message
  44 #   -i, -ins     match regexes case-insensitively; may fail the default `awk`
  45 #   -tsv         split fields using tabs, same as using -F "\t"
  46 
  47 
  48 case "$1" in
  49     -h|--h|-help|--help)
  50         awk '/^# +tawk /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  51         exit 0
  52     ;;
  53 esac
  54 
  55 tsv=0
  56 case_insensitive=0
  57 command='awk'
  58 
  59 while [ $# -gt 0 ]; do
  60     if [ "$1" = "--" ]; then
  61         shift
  62         break
  63     fi
  64 
  65     case "$1" in
  66         -f|-F)
  67             shift
  68             if [ $# -eq 0 ]; then
  69                 printf "expected value after -F option\n" >&2
  70                 exit 1
  71             fi
  72             command="${command} -F $1"
  73             shift
  74             continue
  75         ;;
  76 
  77         -F*)
  78             command="${command} $1"
  79             shift
  80             continue
  81         ;;
  82 
  83         -i|--i|-ins|--ins|-insensitive|--insensitive)
  84             case_insensitive=1
  85             shift
  86             continue
  87         ;;
  88 
  89         -tsv|--tsv)
  90             tsv=1
  91             shift
  92             continue
  93         ;;
  94     esac
  95 
  96     break
  97 done
  98 
  99 code="${1:-\$0}"
 100 [ $# -gt 0 ] && shift
 101 
 102 # show all non-existing files given
 103 failed=0
 104 for arg in "$@"; do
 105     if [ "${arg}" = "-" ]; then
 106         continue
 107     fi
 108     if [ ! -e "${arg}" ]; then
 109         printf "no file named \"%s\"\n" "${arg}" >&2
 110         failed=1
 111     fi
 112 done
 113 
 114 if [ "${failed}" -gt 0 ]; then
 115     exit 2
 116 fi
 117 
 118 ci='
 119     BEGIN {
 120         if (IGNORECASE == "") {
 121             m = "your `awk` command lacks case-insensitive regex-matching"
 122             print(m) > "/dev/stderr"
 123             exit 125
 124         }
 125         IGNORECASE = 1
 126     }
 127 '
 128 if [ "${case_insensitive}" -eq 0 ]; then
 129     ci=''
 130 fi
 131 
 132 src="${ci}"'
 133     BEGIN { print "tally\tvalue"; fflush() }
 134 
 135     {
 136         v = ('"${code}"')
 137         if (!tally[v]++) ordkeys[++oklen] = v
 138     }
 139 
 140     END {
 141         for (i = 1; i <= oklen; i++) {
 142             k = ordkeys[i]
 143             printf "%d\t%s\n", tally[k], k | _SORT_CMD
 144         }
 145     }
 146 '
 147 
 148 if [ "${tsv}" -eq 1 ]; then
 149     ${command} -F "\t" -v _SORT_CMD="sort -t '\t' -rnk1,1" "${src}" "$@"
 150 else
 151     ${command} -v _SORT_CMD="sort -t '\t' -rnk1,1" "${src}" "$@"
 152 fi