File: ncol.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright (c) 2026 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the "Software"), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # ncol [options...] [files...]
  27 #
  28 #
  29 # Nice COLumns realigns and styles data tables using ANSI color sequences. In
  30 # particular, all auto-detected numbers are styled so they're easier to read
  31 # at a glance. Input tables can be either lines of space-separated values or
  32 # tab-separated values, and are auto-detected using the first non-empty line.
  33 #
  34 # When not given filepaths to read data from, this tool reads from standard
  35 # input by default.
  36 #
  37 # For positive numbers, a colorblind-friendly blue is used instead of green
  38 # if either environment variable COLORBLIND or COLOR_BLIND is declared and set
  39 # to 1.
  40 #
  41 # The options are, available both in single and double-dash versions
  42 #
  43 #    -h, -help           show this help message
  44 #    -m, -max-columns    use the row with the most items for the item-count
  45 
  46 
  47 maxcols=0
  48 
  49 for arg in "$@"; do
  50     if [ "${arg}" = '--' ]; then
  51         shift
  52         continue
  53     fi
  54 
  55     case "${arg}" in
  56         -h|--h|-help|--help)
  57             awk '/^# +ncol /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  58             exit 0
  59         ;;
  60 
  61         -m|--m|-maxcols|--maxcols|-maxcolumns|--maxcolumns|-max-columns|\
  62         --max-columns)
  63             maxcols=1
  64             shift
  65             continue
  66         ;;
  67     esac
  68 
  69     break
  70 done
  71 
  72 # show all non-existing files given
  73 failed=0
  74 for arg in "$@"; do
  75     if [ "${arg}" = "-" ]; then
  76         continue
  77     fi
  78     if [ ! -e "${arg}" ]; then
  79         printf "no file named \"%s\"\n" "${arg}" > /dev/stderr
  80         failed=1
  81     fi
  82 done
  83 
  84 if [ "${failed}" -gt 0 ]; then
  85     exit 2
  86 fi
  87 
  88 awk -v maxcols="${maxcols}" '
  89     BEGIN {
  90         if (SUBSEP == "") SUBSEP = "\034"
  91 
  92         # normal positive-style is green, colorblind-friendly positive-style
  93         # becomes the same blue as the zero-style
  94         cb = ENVIRON["COLORBLIND"] != 0 || ENVIRON["COLOR_BLIND"] != 0
  95 
  96         pdtile = cb ? "\x1b[38;2;0;95;215m■" : "\x1b[38;2;0;155;95m■"
  97         pitile = cb ? "\x1b[38;2;0;75;235m■" : "\x1b[38;2;0;135;0m■"
  98         pdrgb = cb ? "0;95;215" : "0;135;95"
  99         pirgb = cb ? "0;75;235" : "0;155;0"
 100     }
 101 
 102     # always ignore trailing carriage-returns
 103     { gsub(/\r$/, "") }
 104 
 105     # first non-empty line auto-detects SSV vs. TSV, and the column-count
 106     ncols == 0 { ncols = NF; if (/\t/) { FS = "\t"; $0 = $0 } }
 107 
 108     ncols > 0 {
 109         if (maxcols && width < NF) width = NF;
 110         nitems[++nrows] = NF
 111 
 112         for (i = 1; i <= NF; i++) {
 113             data[nrows SUBSEP i] = $i
 114 
 115             plain = $i
 116             gsub(/\x1b\[[0-9;]*[A-Za-z]/, "", plain)
 117             w = length(plain)
 118             if (widths[i] < w) widths[i] = w
 119 
 120             # handle non-numbers
 121             if (!match(plain, /^[+-]?[0-9]+(\.[0-9]+)?$/)) continue
 122 
 123             numbers[i]++
 124             sums[i] += plain
 125 
 126             # count `dot-decimals` trail in the number
 127             if (!match(plain, /\./)) continue
 128 
 129             dd = w - (RSTART - 1)
 130             if (dot_decs[i] < dd) dot_decs[i] = dd
 131         }
 132     }
 133 
 134     END {
 135         # fix column-widths using number-padding info and the column-totals
 136         for (i = 1; i <= ncols; i++) {
 137             w = 1
 138             if (numbers[i] > 0) {
 139                 decs = dot_decs[i] > 0 ? dot_decs[i] - 1 : 0
 140                 w = length(sprintf("%.*f", decs, sums[i]))
 141             }
 142             if (widths[i] < w) widths[i] = w
 143         }
 144 
 145         if (nrows == 0 || ncols == 0) exit
 146 
 147         # add fake-row with all the column-sums
 148         nrows++
 149         for (i = 1; i <= ncols; i++) {
 150             data[nrows SUBSEP i] = "-"
 151             if (numbers[i] > 0) {
 152                 decs = dot_decs[i] > 0 ? dot_decs[i] - 1 : 0
 153                 data[nrows SUBSEP i] = sprintf("%.*f", decs, sums[i])
 154             }
 155         }
 156 
 157         for (i = 1; i <= nrows; i++) {
 158             n = nitems[i]
 159 
 160             # show tiles, except for the last fake-row with the sums
 161             for (j = 1; i < nrows && j <= n; j++) {
 162                 v = data[i SUBSEP j]
 163 
 164                 if (v == "") {
 165                     printf "\x1b[0m○"
 166                     continue
 167                 }
 168 
 169                 if (!match(v, /^[+-]?[0-9]+(\.[0-9]+)?$/)) {
 170                     if (v ~ /^ | $/) printf "\x1b[38;2;196;160;0m■"
 171                     else printf "\x1b[38;2;128;128;128m■"
 172                     continue
 173                 }
 174 
 175                 if (v > 0) {
 176                     if (match(v, /\./)) printf pdtile
 177                     else printf pitile
 178                     continue
 179                 }
 180 
 181                 if (v < 0) {
 182                     if (match(v, /\./)) printf "\x1b[38;2;215;95;95m■"
 183                     else printf "\x1b[38;2;204;0;0m■"
 184                     continue
 185                 }
 186 
 187                 printf "\x1b[38;2;0;95;215m■"
 188             }
 189 
 190             # show tiles for missing trailing fields, except for the fake-row
 191             if (i < nrows) {
 192                 extra = ncols - nitems[i]
 193                 if (extra > 0) printf "\x1b[0m"
 194                 for (j = 1; j <= extra; j++) printf "×"
 195                 printf "\x1b[0m  "
 196             } else printf "%*s", ncols + 2, ""
 197 
 198             due = 0
 199 
 200             # show/realign row fields
 201             for (j = 1; j <= ncols; j++) {
 202                 v = data[i SUBSEP j]
 203 
 204                 # put 2-space gaps between columns
 205                 if (1 < j) due += 2
 206 
 207                 if (v ~ /^ *$/) {
 208                     due += widths[j]
 209                     continue
 210                 }
 211 
 212                 plain = v
 213                 gsub(/\x1b\[[0-9;]*[A-Za-z]/, "", plain)
 214                 w = length(plain)
 215 
 216                 # handle non-numbers
 217                 if (!match(plain, /^[+-]?[0-9]+(\.[0-9]+)?$/)) {
 218                     printf "%*s%s", due, "", v
 219                     due = widths[j] - w
 220                     continue
 221                 }
 222 
 223                 # count `dot-decimals` trail in the number
 224                 dd = match(plain, /\./) ? w - (RSTART - 1) : 0
 225 
 226                 rpad = dot_decs[j] - dd
 227                 lpad = widths[j] - (w + rpad) + due
 228 
 229                 if (plain > 0) rgb = dot_decs[j] ? pdrgb : pirgb
 230                 else if (plain < 0) rgb = dot_decs[j] ? "215;95;95" : "204;0;0"
 231                 else rgb = "0;95;215"
 232 
 233                 printf "%*s\x1b[38;2;%sm%s\x1b[0m", lpad, "", rgb, v
 234                 due = rpad
 235             }
 236 
 237             # treat extra fields as part of the last one
 238             last = nitems[i]
 239             for (j = ncols + 1; j <= last; j++) printf " %s", data[i SUBSEP j]
 240 
 241             print ""
 242         }
 243     }
 244 ' "$@" | sed -E \
 245     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3\x1b[38;2;168;168;168m\4\x1b[0m\5\x1b[38;2;168;168;168m\6\x1b[0m\7-g' \
 246     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3\x1b[38;2;168;168;168m\4\x1b[0m\5\x1b[38;2;168;168;168m\6\x1b[0m-g' \
 247     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3\x1b[38;2;168;168;168m\4\x1b[0m\5-g' \
 248     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3\x1b[38;2;168;168;168m\4\x1b[0m-g' \
 249     -e 's-([0-9]{1,3})([0-9]{3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m\3-g' \
 250     -e 's-([0-9]{1,3})([0-9]{3})\x1b\[0m-\1\x1b[38;2;168;168;168m\2\x1b[0m-g'