File: lines.sh
   1 #!/bin/sh
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # lines [files/URIs...]
  27 #
  28 # Fetch data from the named sources given to it, whether these are
  29 # filenames or URIs. Single dashes stand for standard input, and can be
  30 # used more than once. When no names are given, stdin is read by default.
  31 #
  32 # This script turns all CRLF byte-pairs into single LF bytes, and ensures
  33 # all non-empty inputs end with a final LF byte, which avoids accidentally
  34 # joining lines across different inputs.
  35 #
  36 # It also ignores leading UTF-8 BOMs on each input's first line.
  37 
  38 
  39 # fix_lines ensures CRLF byte-pairs become single LF bytes, and that
  40 # leading UTF-8 BOMs on the first lines of each input are ignored
  41 fix_lines() {
  42     awk 'FNR == 1 { gsub(/^\xef\xbb\xbf/, "") } 1' | sed -E 's-\r$--'
  43 }
  44 
  45 # fail quits the script right after showing the message given, using
  46 # exit code given as its 2nd arg
  47 fail() {
  48     printf "\x1b[31m%s\x1b[0m\n" "$1" > /dev/stderr
  49     exit "$2"
  50 }
  51 
  52 # when no args are given, just read from stdin and quit
  53 if [ $# -eq 0 ]; then
  54     fix_lines
  55     exit $?
  56 fi
  57 
  58 # handle leading options
  59 case "$1" in
  60     -h|--h|-help|--help)
  61         # show help message, extracting the info-comment at the start
  62         # of this file, and quit
  63         awk '/^# +lines/, /^$/ { gsub(/^# ?/, ""); print }' "$0"
  64         exit 0
  65     ;;
  66 esac
  67 
  68 # ensure single dashes aren't used multiple times
  69 dashes=0
  70 for a in "$@"; do
  71     if [ "$a" = "-" ]; then
  72         if [ "$dashes" -gt 0 ]; then
  73             fail "can't use standard input (single-dash) more than once" 1
  74         fi
  75         dashes=1
  76     fi
  77 done
  78 
  79 # ensure errors propagate correctly thru pipes
  80 # set -o pipefail
  81 
  82 for a in "$@"; do
  83     case "$a" in
  84         -)
  85             # handle a dash by reading from stdin
  86             fix_lines || exit $?
  87         ;;
  88 
  89         dict://*|file://*|ftp://*|ftps://*|gopher://*|gophers://*|\
  90         http://*|https://*|rtmp://*|rtsp://*|scp://*|sftp://*|\
  91         smb://*|smbs://*|telnet://*|tftp://*)
  92             # handle URIs
  93             (curl -s -L "$a" | fix_lines) || fail "failed to fetch URI $a" $?
  94         ;;
  95 
  96         data:*)
  97             # handle data-URIs
  98             (printf "%s" "$a" | sed 's-^.*,--1' | base64 -d | fix_lines) || \
  99                 fail "failed to decode data-URI $a" $?
 100         ;;
 101 
 102         *)
 103             # handle files
 104             (cat "$a" | fix_lines) || fail "failed to open file $a" $?
 105         ;;
 106     esac
 107 done