File: dedup.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright (c) 2026 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the "Software"), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # dedup [files...]
  27 #
  28 # Deduplicate lines, ensuring each unique input line appears only once on the
  29 # output. Unlike the standard cmd-line app `uniq`, which only works correctly
  30 # when given sorted lines, this tool works correctly without any scrambling.
  31 
  32 
  33 buffered=0
  34 
  35 while [ $# -gt 0 ]; do
  36     case "$1" in
  37         -b|--b|-buffered|--buffered)
  38             buffered=1
  39             shift
  40             continue
  41         ;;
  42 
  43         -h|--h|-help|--help)
  44             awk '/^# +dedup /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  45             exit 0
  46         ;;
  47     esac
  48 
  49     break
  50 done
  51 
  52 [ "$1" = '--' ] && shift
  53 
  54 # show all non-existing files given
  55 failed=0
  56 for arg in "$@"; do
  57     if [ "${arg}" = "-" ]; then
  58         continue
  59     fi
  60     if [ ! -e "${arg}" ]; then
  61         printf "no file named \"%s\"\n" "${arg}" >&2
  62         failed=1
  63     fi
  64 done
  65 
  66 if [ "${failed}" -gt 0 ]; then
  67     exit 2
  68 fi
  69 
  70 flush=0
  71 if [ "${buffered}" -eq 0 ] && { [ -p /dev/stdout ] || [ -t 1 ]; }; then
  72     flush=1
  73 fi
  74 
  75 awk -v flush="${flush}" '
  76     BEGIN { for (i = 1; i < ARGC; i++) if (files[ARGV[i]]++) delete ARGV[i] }
  77     !c[$0]++
  78     flush { fflush() }
  79 ' "$@"