File: wctabs.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # wctabs [options...] [filepaths...]
  27 #
  28 # Run `wc` (word-count) with the arguments given, turning its output into
  29 # lines of tab-separated values: this removes output-parsing ambiguities,
  30 # in case of filepaths with spaces in them.
  31 #
  32 # You can run `wc --help` to see all its options, which you can also use
  33 # with this script.
  34 
  35 
  36 # handle help option(s)
  37 case "$1" in
  38     -h|--h|-help|--help)
  39         awk '/^# +wctabs/, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  40         exit 0
  41     ;;
  42 esac
  43 
  44 # avoid testing all permutations of up to 5 single-char options
  45 stats=$(awk 'BEGIN {
  46     bytes = 0
  47     chars = 0
  48     words = 0
  49     lines = 0
  50     maxll = 0
  51 
  52     for (i = 1; i < ARGC; i++) {
  53         s = ARGV[i]
  54         delete ARGV[i]
  55 
  56         if (s !~ /^-/) continue
  57 
  58         switch (s) {
  59         case "--bytes":
  60             bytes = 1
  61             break
  62         case "--chars":
  63             chars = 1
  64             break
  65         case "--lines":
  66             lines = 1
  67             break
  68         case "--words":
  69             words = 1
  70             break
  71         case "--max-line-length":
  72             maxll = 1
  73             break
  74         default:
  75             if (s ~ /c/) bytes = 1
  76             else if (s ~ /m/) chars = 1
  77             else if (s ~ /w/) words = 1
  78             else if (s ~ /l/) lines = 1
  79             else if (s ~ /L/) maxll = 1
  80             else {
  81                 stderr = "/dev/stderr"
  82                 printf "\x1b[31munsupported wc option %s\x1b[0m\n", s > stderr
  83             }
  84             break
  85         }
  86     }
  87 
  88     stats = bytes + chars + words + lines + maxll
  89     # `wc` defaults to 3 stats when not explicitly given any option
  90     print (stats == 0) ? 3 : stats
  91 }' "$@")
  92 
  93 errmsg="only up to 5 wc options are supported"
  94 longcmd="s-^ +--; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1"
  95 
  96 case "${stats}" in
  97     1) wc "$@" | sed -E 's-^ +--; s- +-\t-1';;
  98     2) wc "$@" | sed -E 's-^ +--; s- +-\t-1; s- +-\t-1';;
  99     3) wc "$@" | sed -E 's-^ +--; s- +-\t-1; s- +-\t-1; s- +-\t-1';;
 100     4) wc "$@" | sed -E 's-^ +--; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1';;
 101     5) wc "$@" | sed -E "${longcmd}";;
 102     *)
 103         printf "\e[31m%s: %s\e[0m\n" "$0" "${errmsg}" > /dev/stderr
 104         exit 1
 105     ;;
 106 esac