File: wctabs.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # wctabs [options...] [filepaths...]
  27 #
  28 # Run `wc` (word-count) with the arguments given, turning its output into
  29 # lines of tab-separated values: this removes output-parsing ambiguities,
  30 # in case of filepaths with spaces in them.
  31 #
  32 # You can run `wc --help` to see all its options, which you can also use
  33 # with this script.
  34 
  35 
  36 case "$1" in
  37     -h|--h|-help|--help)
  38         awk '/^# +wctabs /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  39         exit 0
  40     ;;
  41 esac
  42 
  43 # avoid testing all permutations of up to 5 single-char options
  44 stats=$(awk '
  45     BEGIN {
  46         for (i = 1; i < ARGC; i++) {
  47             s = ARGV[i]
  48             delete ARGV[i]
  49 
  50             if (s == "--") done = 1
  51             if (done) continue
  52 
  53             if (s == "--bytes") bytes = 1
  54             if (s == "--chars") chars = 1
  55             if (s == "--lines") lines = 1
  56             if (s == "--words") words = 1
  57             if (s == "--max-line-length") maxll = 1
  58             if (s ~ /^--/) continue
  59 
  60             if (s !~ /^-/) continue
  61             if (s ~ /c/) bytes = 1
  62             if (s ~ /m/) chars = 1
  63             if (s ~ /l/) lines = 1
  64             if (s ~ /w/) words = 1
  65             if (s ~ /L/) maxll = 1
  66         }
  67 
  68         stats = bytes + chars + words + lines + maxll + 0
  69 
  70         # `wc` defaults to 3 stats when not explicitly given any option
  71         print (stats == 0) ? 3 : stats
  72 
  73         exit
  74     }
  75 ' "$@")
  76 
  77 cmd='sed -E'
  78 if [ -p /dev/stdout ] || [ -t 1 ]; then
  79     cmd='sed -E -u'
  80 fi
  81 
  82 wc "$@" | case "${stats}" in
  83     0) ${cmd} 's-^ +--; s- +-\t-1; s- +-\t-1; s- +-\t-1';;
  84     1) ${cmd} 's-^ +--; s- +-\t-1';;
  85     2) ${cmd} 's-^ +--; s- +-\t-1; s- +-\t-1';;
  86     3) ${cmd} 's-^ +--; s- +-\t-1; s- +-\t-1; s- +-\t-1';;
  87     4) ${cmd} 's-^ +--; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1';;
  88     5) ${cmd} 's-^ +--; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1; s- +-\t-1';;
  89     *) ${cmd} 's-^ +--; s- +-\t-1; s- +-\t-1; s- +-\t-1';;
  90 esac