File: ncol.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # ncol [options...] [filenames...]
  27 #
  28 # Nice COLumns realigns and styles data tables using ANSI color sequences. In
  29 # particular, all auto-detected numbers are styled so they're easier to read
  30 # at a glance. Input tables can be either lines of space-separated values or
  31 # tab-separated values, and are auto-detected using the first non-empty line.
  32 #
  33 # When not given filepaths to read data from, this tool reads from standard
  34 # input by default.
  35 #
  36 # The only option is the help option, using any of `-h`, `--h`, `-help`, or
  37 # `--help`.
  38 
  39 
  40 case "$1" in
  41     -h|--h|-help|--help)
  42         awk '/^# +ncol /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  43         exit 0
  44     ;;
  45 esac
  46 
  47 [ "$1" = "--" ] && shift
  48 
  49 # show all non-existing files given
  50 failed=0
  51 for arg in "$@"; do
  52     if [ "${arg}" = "-" ]; then
  53         continue
  54     fi
  55     if [ ! -e "${arg}" ]; then
  56         printf "no file named \"%s\"\n" "${arg}" > /dev/stderr
  57         failed=1
  58     fi
  59 done
  60 
  61 if [ "${failed}" -gt 0 ]; then
  62     exit 2
  63 fi
  64 
  65 awk '
  66     function match_number(v) {
  67         return match(v, /^[+-]?[0-9]+(\.[0-9]+)?$/)
  68     }
  69 
  70     function match_dot_digits(v) {
  71         return match(v, /\.[0-9]+$/)
  72     }
  73 
  74     # show_tiles uses `pad` and `decs` as local variables
  75     function show_tiles(i, l, j, v, pad, decs) {
  76         for (j = 1; j <= l; j++) {
  77             v = data[i][j]
  78 
  79             if (v == "") {
  80                 printf "%s", "\x1b[0m○"
  81                 continue
  82             }
  83 
  84             if (!match_number(v)) {
  85                 pad = v ~ /^ | $/
  86                 v = pad ? "\x1b[38;2;196;160;0m■" : "\x1b[38;2;128;128;128m■"
  87                 printf "%s", v
  88                 continue
  89             }
  90 
  91             if (v > 0) {
  92                 decs = match_dot_digits(v)
  93                 v = decs ? "\x1b[38;2;0;135;95m■" : "\x1b[38;2;0;95;0m■"
  94                 printf "%s", v
  95                 continue
  96             }
  97 
  98             if (v < 0) {
  99                 decs = match_dot_digits(v)
 100                 v = decs ? "\x1b[38;2;215;95;95m■" : "\x1b[38;2;204;0;0m■"
 101                 printf "%s", v
 102                 continue
 103             }
 104 
 105             printf "%s", "\x1b[38;2;0;95;215m■"
 106         }
 107 
 108         printf "\x1b[0m"
 109         for (j = l + 1; j <= num_cols; j++) {
 110             printf "%s", "×"
 111         }
 112         printf "\x1b[0m"
 113     }
 114 
 115     # show_number uses `dd`, `iw`, `dpad`, `ipad`, `lpad`, `style`, and `decs`
 116     # as local variables
 117     function show_number(v, j, last, w, dd, iw, dpad, ipad, lpad, style, decs) {
 118         if (match_dot_digits(v)) {
 119             dd = RLENGTH
 120             iw = RSTART - 1
 121         } else {
 122             dd = 0
 123             iw = w
 124         }
 125 
 126         dpad = dot_decs[j] - dd
 127         ipad = int_widths[j] - iw
 128         if (ipad < 0) ipad = 0
 129         lpad = widths[j] - (ipad + w + dpad)
 130         if (lpad < 0) lpad = 0
 131 
 132         # avoid adding trailing spaces at the end of lines
 133         if (j == last) dpad = 0
 134 
 135         if (v > 0) {
 136             decs = dot_decs[j] > 0
 137             style = decs ? "\x1b[38;2;0;135;95m" : "\x1b[38;2;0;95;0m"
 138         } else if (v < 0) {
 139             decs = dot_decs[j] > 0
 140             style = decs ? "\x1b[38;2;215;95;95m" : "\x1b[38;2;204;0;0m"
 141         } else {
 142             style = "\x1b[38;2;0;95;215m"
 143         }
 144 
 145         printf "%*s%*s%s%s\x1b[0m%*s", lpad, "", ipad, "", style, v, dpad, ""
 146     }
 147 
 148     # always ignore trailing carriage-returns
 149     { gsub(/\r$/, "") }
 150 
 151     # first non-empty line auto-detects whether input is SSV or TSV
 152     num_cols == 0 && /\t/ { FS = "\t"; $0 = $0 }
 153 
 154     # first non-empty line auto-detects number of table columns
 155     num_cols == 0 { num_cols = NF }
 156 
 157     num_cols > 0 {
 158         num_rows++;
 159 
 160         for (i = 1; i <= NF; i++) {
 161             data[num_rows][i] = $i
 162 
 163             if (match_number($i)) {
 164                 numbers[i]++
 165                 sums[i] += $i + 0
 166 
 167                 if (match_dot_digits($i)) {
 168                     dd = RLENGTH
 169                     if (dot_decs[i] < dd) dot_decs[i] = dd
 170                     iw = RSTART - 1
 171                     if (int_widths[i] < iw) int_widths[i] = iw
 172                 } else {
 173                     w = length($i)
 174                     if (int_widths[i] < w) int_widths[i] = w
 175                 }
 176 
 177                 continue
 178             }
 179 
 180             w = length($i)
 181             if (widths[i] < w) widths[i] = w
 182         }
 183     }
 184 
 185     END {
 186         # fix column-widths using number-padding info and the column-totals
 187         for (i = 1; i <= num_cols; i++) {
 188             if (numbers[i] > 0) {
 189                 decs = dot_decs[i] > 0 ? dot_decs[i] - 1 : 0
 190                 w = length(sprintf("%.*f", decs, sums[i]))
 191             } else {
 192                 w = 1
 193             }
 194             if (widths[i] < w) widths[i] = w
 195 
 196             w = int_widths[i] + dot_decs[i]
 197             if (widths[i] < w) widths[i] = w
 198         }
 199 
 200         for (i = 1; i <= num_rows; i++) {
 201             show_tiles(i, num_cols)
 202             printf "  "
 203 
 204             for (j = 1; j <= num_cols; j++) {
 205                 v = data[i][j]
 206 
 207                 # put 2-space gaps between columns
 208                 if (j > 1 && j < num_cols) printf "  "
 209                 else if (j == num_cols && v != "") printf "  "
 210 
 211                 if (!match_number(v)) {
 212                     # avoid adding trailing spaces at the end of lines
 213                     printf "%*s", (j == num_cols) ? 0 : -widths[j], v
 214                     continue
 215                 }
 216 
 217                 show_number(v, j, num_cols, length(v))
 218             }
 219 
 220             # treat extra columns as part of the last one
 221             last = length(data[i])
 222             for (j = num_cols + 1; j <= last; j++) printf " %s", data[i][j]
 223 
 224             printf "\n"
 225         }
 226 
 227         # show extra row with the column-sums
 228         if (num_cols > 0) printf "%*s", num_cols, ""
 229         for (i = 1; i <= num_cols; i++) {
 230             printf "  "
 231             if (numbers[i] > 0) {
 232                 decs = dot_decs[i] > 0 ? dot_decs[i] - 1 : 0
 233                 s = sprintf("%.*f", decs, sums[i])
 234                 show_number(s, i, num_cols, length(s))
 235             } else {
 236                 printf "%*s", -widths[i], "-"
 237             }
 238         }
 239         if (num_cols > 0) printf "\n"
 240     }
 241 ' "$@" | sed -E \
 242     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3\x1b[38;2;168;168;168m\4\x1b[0m\5\x1b[38;2;168;168;168m\6\x1b[0m\7-g' \
 243     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3\x1b[38;2;168;168;168m\4\x1b[0m\5\x1b[38;2;168;168;168m\6\x1b[0m-g' \
 244     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3\x1b[38;2;168;168;168m\4\x1b[0m\5-g' \
 245     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3\x1b[38;2;168;168;168m\4\x1b[0m-g' \
 246     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3-g' \
 247     -e 's-([0-9]{1,3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m-g'