#!/bin/sh # The MIT License (MIT) # # Copyright © 2020-2025 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # dog [options...] [paths/URIs...] # # # Dog fetches data from the named sources given to it, whether these are # filenames or URIs. Single dashes stand for standard input, and can't be # used more than once. When no names are given, stdin is read by default. # # A line-mode is available via leading option `-l`, or its aliases `--l`, # `-lines`, and `--lines`. This mode turns all CRLF byte-pairs into single # LF bytes, and ensures all non-empty inputs end with a final LF byte, # which avoids accidentally joining lines across different inputs. # # Line-mode also ignores leading UTF-8 BOMs on each input's first line. # fix_lines ensures CRLF byte-pairs become single LF bytes, and that # leading UTF-8 BOMs on the first lines of each input are ignored fix_lines() { awk ' FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } { gsub(/\r$/, ""); print; fflush() } ' "$@" } # fail quits the script right after showing the message given, using # exit code given as its 2nd arg fail() { printf "\e[31m%s\e[0m\n" "$1" > /dev/stderr exit "${2:-1}" } # when no args are given, just show the help message and quit if [ $# -eq 0 ]; then fix_lines exit $? fi # handle leading options lines=0 case "$1" in -h|--h|-help|--help) awk '/^# +dog /, /^$/ { gsub(/^# ?/, ""); print }' "$0" exit 0 ;; -l|--l|-lines|--lines) # enable line-mode lines=1 # don't confuse this option as a named input, later on shift ;; esac # ensure single dashes aren't used multiple times dashes=0 for a in "$@"; do if [ "$a" = "-" ]; then if [ "$dashes" -gt 0 ]; then fail "can't use stdin (single-dash) more than once" 1 fi dashes=1 fi done # ensure errors propagate correctly, even when in line-mode # set -o pipefail for a in "$@"; do case "$a" in -) # handle a dash by reading from stdin if [ "$lines" -eq 1 ]; then fix_lines || exit $? else cat || exit $? fi ;; dict://*|file://*|ftp://*|ftps://*|gopher://*|gophers://*|\ http://*|https://*|rtmp://*|rtsp://*|scp://*|sftp://*|\ smb://*|smbs://*|telnet://*|tftp://*) # handle URIs if [ "$lines" -eq 1 ]; then { curl -s -L "$a" | fix_lines; } || \ fail "failed to fetch URI $a" $? else curl -s -L "$a" || fail "failed to fetch URI $a" $? fi ;; data:*) # handle data-URIs if [ "$lines" -eq 1 ]; then { printf "%s" "$a" | sed -E 's-^data:.{0,50};base64,--' | base64 -d | fix_lines } || fail "failed to decode data-URI $a" $? else { printf "%s" "$a" | sed -E 's-^data:.{0,50};base64,--' | base64 -d } || fail "failed to decode data-URI $a" $? fi ;; *) # handle files and bytes-mode data-URIs if [ "$lines" -eq 1 ]; then fix_lines "$a" || fail "failed to open file $a" $? else cat "$a" || fail "failed to open file $a" $? fi ;; esac done