File: sawk.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # sawk [awk expression...] [filenames...]
  27 #
  28 # Summarize via AWK calculates simple numeric statistics from the AWK
  29 # expression given, using each input line as a data source. Welford's
  30 # algorithm is used for improved accuracy.
  31 
  32 
  33 case "$1" in
  34     -h|--h|-help|--help)
  35         awk '/^# +sawk /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  36         exit 0
  37     ;;
  38 esac
  39 
  40 [ "$1" = "--" ] && shift
  41 
  42 code="${1:-\$0}"
  43 [ $# -gt 0 ] && shift
  44 
  45 awk '
  46     # initialize stats
  47     BEGIN {
  48         inf = "+inf" + 0
  49 
  50         min = inf
  51         max = -inf
  52         count = 0
  53         sum = 0
  54         mean = 0
  55         prod = 1
  56 
  57         ints = 0
  58         pos = 0
  59         zero = 0
  60         neg = 0
  61     }
  62 
  63     # update numeric stats using the result from the expression given
  64     {
  65         v = '"${code}"'
  66         ints += v % 1 == 0
  67 
  68         count++
  69         min = min < v ? min : v
  70         max = max > v ? max : v
  71         sum += v
  72         prod *= v
  73         lnSum += v <= 0 ? -inf : log(v)
  74 
  75         if (v > 0) { pos++ }
  76         else if (v < 0) { neg++ }
  77         else if (v == 0) { zero++ }
  78 
  79         # advance welford`s algorithm
  80         d1 = v - mean
  81         mean += d1 / count
  82         d2 = v - mean
  83         meanSq += d1 * d2
  84     }
  85 
  86     # report final numeric stats
  87     END {
  88         #sum = mean * count
  89         if (count == 0) lnSum = -inf
  90 
  91         # separate name-value pairs using tabs, and prepare a
  92         # pipeable command which ignores all-zero decimals
  93         OFS = "\t"
  94 
  95         # pipe all output lines into the special command, even
  96         # when it is seemingly unneeded due to integer values,
  97         # since not doing so can scramble the final order of
  98         # the output lines
  99         print "numeric", count
 100         print "min", sprintf("%f", min)
 101         print "max", sprintf("%f", max)
 102         print "sum", sprintf("%f", sum)
 103         print "mean", sprintf("%f", mean)
 104         if (lnSum != -inf && count > 0) {
 105             geomean = exp(lnSum / count)
 106             print "geomean", sprintf("%f", geomean)
 107         } else {
 108             print "geomean", ""
 109         }
 110         if (count > 0) {
 111             sd = sqrt(meanSq / count)
 112             print "sd", sprintf("%f", sd)
 113             print "product", sprintf("%g", prod)
 114         } else {
 115             print "sd", ""
 116             print "product", ""
 117         }
 118         print "integer", ints
 119         print "positive", pos
 120         print "zero", zero
 121         print "negative", neg
 122     }
 123 ' "$@" | sed -E 's-([0-9]+)\.0+$-\1-g; s-([0-9]+\.[0-9]*[1-9])0+$-\1-g'