File: dog.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # dog [options...] [paths/URIs...]
  27 #
  28 #
  29 # Dog fetches data from the named sources given to it, whether these are
  30 # filenames or URIs. Single dashes stand for standard input, and can't be
  31 # used more than once. When no names are given, stdin is read by default.
  32 #
  33 # A line-mode is available via leading option `-l`, or its aliases `--l`,
  34 # `-lines`, and `--lines`. This mode turns all CRLF byte-pairs into single
  35 # LF bytes, and ensures all non-empty inputs end with a final LF byte,
  36 # which avoids accidentally joining lines across different inputs.
  37 #
  38 # Line-mode also ignores leading UTF-8 BOMs on each input's first line.
  39 
  40 
  41 # fix_lines ensures CRLF byte-pairs become single LF bytes, and that
  42 # leading UTF-8 BOMs on the first lines of each input are ignored
  43 fix_lines() {
  44     awk '
  45         FNR == 1 { gsub(/^\xef\xbb\xbf/, "") }
  46         { gsub(/\r$/, ""); print; fflush() }
  47     ' "$@"
  48 }
  49 
  50 # fail quits the script right after showing the message given, using
  51 # exit code given as its 2nd arg
  52 fail() {
  53     printf "\e[31m%s\e[0m\n" "$1" > /dev/stderr
  54     exit "${2:-1}"
  55 }
  56 
  57 # when no args are given, just show the help message and quit
  58 if [ $# -eq 0 ]; then
  59     fix_lines
  60     exit $?
  61 fi
  62 
  63 # handle leading options
  64 lines=0
  65 case "$1" in
  66     -h|--h|-help|--help)
  67         awk '/^# +dog /, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  68         exit 0
  69     ;;
  70     -l|--l|-lines|--lines)
  71         # enable line-mode
  72         lines=1
  73         # don't confuse this option as a named input, later on
  74         shift
  75     ;;
  76 esac
  77 
  78 # ensure single dashes aren't used multiple times
  79 dashes=0
  80 for a in "$@"; do
  81     if [ "$a" = "-" ]; then
  82         if [ "$dashes" -gt 0 ]; then
  83             fail "can't use stdin (single-dash) more than once" 1
  84         fi
  85         dashes=1
  86     fi
  87 done
  88 
  89 # ensure errors propagate correctly, even when in line-mode
  90 # set -o pipefail
  91 
  92 for a in "$@"; do
  93     case "$a" in
  94         -)
  95             # handle a dash by reading from stdin
  96             if [ "$lines" -eq 1 ]; then
  97                 fix_lines || exit $?
  98             else
  99                 cat || exit $?
 100             fi
 101         ;;
 102 
 103         dict://*|file://*|ftp://*|ftps://*|gopher://*|gophers://*|\
 104         http://*|https://*|rtmp://*|rtsp://*|scp://*|sftp://*|\
 105         smb://*|smbs://*|telnet://*|tftp://*)
 106             # handle URIs
 107             if [ "$lines" -eq 1 ]; then
 108                 { curl -s -L "$a" | fix_lines; } || \
 109                     fail "failed to fetch URI $a" $?
 110             else
 111                 curl -s -L "$a" || fail "failed to fetch URI $a" $?
 112             fi
 113         ;;
 114 
 115         data:*)
 116             # handle data-URIs
 117             if [ "$lines" -eq 1 ]; then
 118                 {
 119                     printf "%s" "$a" | sed -E 's-^data:.{0,50};base64,--' |
 120                         base64 -d | fix_lines
 121                 } || fail "failed to decode data-URI $a" $?
 122             else
 123                 {
 124                     printf "%s" "$a" | sed -E 's-^data:.{0,50};base64,--' |
 125                         base64 -d
 126                 } || fail "failed to decode data-URI $a" $?
 127             fi
 128         ;;
 129 
 130         *)
 131             # handle files and bytes-mode data-URIs
 132             if [ "$lines" -eq 1 ]; then
 133                 fix_lines "$a" || fail "failed to open file $a" $?
 134             else
 135                 cat "$a" || fail "failed to open file $a" $?
 136             fi
 137         ;;
 138     esac
 139 done