File: lines.sh 1 #!/bin/sh 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # lines [files/URIs...] 27 # 28 # Fetch data from the named sources given to it, whether these are 29 # filenames or URIs. Single dashes stand for standard input, and can be 30 # used more than once. When no names are given, stdin is read by default. 31 # 32 # This script turns all CRLF byte-pairs into single LF bytes, and ensures 33 # all non-empty inputs end with a final LF byte, which avoids accidentally 34 # joining lines across different inputs. 35 # 36 # It also ignores leading UTF-8 BOMs on each input's first line. 37 38 39 # fix_lines ensures CRLF byte-pairs become single LF bytes, and that 40 # leading UTF-8 BOMs on the first lines of each input are ignored 41 fix_lines() { 42 awk 'FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1' | sed -E 's-\r$--' 43 } 44 45 # fail quits the script right after showing the message given, using 46 # exit code given as its 2nd arg 47 fail() { 48 printf "\x1b[31m%s\x1b[0m\n" "$1" > /dev/stderr 49 exit "$2" 50 } 51 52 # when no args are given, just read from stdin and quit 53 if [ $# -eq 0 ]; then 54 fix_lines 55 exit $? 56 fi 57 58 # handle leading options 59 case "$1" in 60 -h|--h|-help|--help) 61 # show help message, extracting the info-comment at the start 62 # of this file, and quit 63 awk '/^# +lines/, /^$/ { gsub(/^# ?/, ""); print }' "$0" 64 exit 0 65 ;; 66 esac 67 68 # ensure single dashes aren't used multiple times 69 dashes=0 70 for a in "$@"; do 71 if [ "$a" = "-" ]; then 72 if [ "$dashes" -gt 0 ]; then 73 fail "can't use standard input (single-dash) more than once" 1 74 fi 75 dashes=1 76 fi 77 done 78 79 # ensure errors propagate correctly thru pipes 80 # set -o pipefail 81 82 for a in "$@"; do 83 case "$a" in 84 -) 85 # handle a dash by reading from stdin 86 fix_lines || exit $? 87 ;; 88 89 dict://*|file://*|ftp://*|ftps://*|gopher://*|gophers://*|\ 90 http://*|https://*|rtmp://*|rtsp://*|scp://*|sftp://*|\ 91 smb://*|smbs://*|telnet://*|tftp://*) 92 # handle URIs 93 (curl -s -L "$a" | fix_lines) || fail "failed to fetch URI $a" $? 94 ;; 95 96 data:*) 97 # handle data-URIs 98 (printf "%s" "$a" | sed 's-^.*,--1' | base64 -d | fix_lines) || \ 99 fail "failed to decode data-URI $a" $? 100 ;; 101 102 *) 103 # handle files 104 (cat "$a" | fix_lines) || fail "failed to open file $a" $? 105 ;; 106 esac 107 done