File: dog.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from shutil import copyfileobj
  27 from sys import argv, exit, stderr, stdin, stdout
  28 from typing import List
  29 
  30 
  31 info = '''
  32 dog [options...] [filepaths/URIs...]
  33 
  34 This script reads/fetches all named sources given to it, which can be a mix
  35 of filepaths and URIs. Its name suggests that, unlike `cat`, it can also
  36 fetch URIs.
  37 
  38 Any of `-l`, `--l`, `-lines`, or `--lines` enables line-mode, which ensures
  39 each non-empty input ends with a line feed, even when the original data do
  40 not; this mode also ignores leading UTF-8 BOM on first lines, and turns all
  41 CRLF byte-pairs into simple line-feeds. In other words, line-mode ensures
  42 unix-style output lines.
  43 
  44 The help option is `-h`, `--h`, `-help`, or `--help`.
  45 '''
  46 
  47 # a leading help-option arg means show the help message and quit
  48 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  49     print(info.strip(), file=stderr)
  50     exit(0)
  51 
  52 
  53 def seems_url(s: str) -> bool:
  54     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
  55     return any(s.startswith(p) for p in protocols)
  56 
  57 
  58 def bytes_mode(args: List[str]) -> None:
  59     dashes = 0
  60     data = bytes()
  61     reuse_stdin = args.count('-') > 1
  62 
  63     if any(seems_url(e) for e in args):
  64         from urllib.request import urlopen
  65 
  66     for path in args:
  67         if path == '-':
  68             dashes += 1
  69             if reuse_stdin and dashes == 1:
  70                 data = stdin.buffer.read()
  71             if reuse_stdin:
  72                 stdout.buffer.write(data)
  73                 stdout.flush()
  74                 continue
  75             copyfileobj(stdin.buffer, stdout.buffer)
  76             stdout.flush()
  77             continue
  78 
  79         if seems_url(path):
  80             with urlopen(path) as inp:
  81                 copyfileobj(inp, stdout.buffer)
  82             stdout.flush()
  83             continue
  84 
  85         with open(path, mode='rb') as inp:
  86             copyfileobj(inp, stdout.buffer)
  87         stdout.flush()
  88 
  89     if len(args) == 0:
  90         copyfileobj(stdin.buffer, stdout.buffer)
  91         stdout.flush()
  92 
  93 
  94 def lines_mode(args: List[str]) -> None:
  95     dashes = 0
  96     lines = []
  97     reuse_stdin = args.count('-') > 1
  98 
  99     if any(seems_url(e) for e in args):
 100         from urllib.request import urlopen
 101 
 102     for path in args:
 103         if path == '-':
 104             dashes += 1
 105             if reuse_stdin and dashes == 1:
 106                 lines = load_lines(stdin, '\xef\xbb\xbf', '\r\n', '\n')
 107             if reuse_stdin:
 108                 for line in lines:
 109                     stdout.write(line)
 110                     stdout.write('\n')
 111                 stdout.flush()
 112                 continue
 113             emit_lines(stdout, stdin, '\xef\xbb\xbf', '\r\n', '\n')
 114             stdout.flush()
 115             continue
 116 
 117         if seems_url(path):
 118             with urlopen(path) as inp:
 119                 w = stdout.buffer
 120                 emit_lines(w, inp, b'\xef\xbb\xbf', b'\r\n', b'\n')
 121             stdout.flush()
 122             continue
 123 
 124         with open(path, encoding='utf-8') as inp:
 125             emit_lines(stdout, inp, '\xef\xbb\xbf', '\r\n', '\n')
 126         stdout.flush()
 127 
 128     if len(args) == 0:
 129         emit_lines(stdout, stdin, '\xef\xbb\xbf', '\r\n', '\n')
 130         stdout.flush()
 131 
 132 
 133 def load_lines(src, utf8bom, crlf, lf) -> List[str]:
 134     lines = []
 135     first = True
 136     for line in src:
 137         line = line.lstrip(utf8bom) if first else line
 138         # ignore trailing carriage-returns and line-feeds in lines
 139         lines.append(line.rstrip(crlf).rstrip(lf))
 140         first = False
 141     return lines
 142 
 143 
 144 def emit_lines(w, src, utf8bom, crlf, lf) -> None:
 145     first = True
 146     for line in src:
 147         line = line.lstrip(utf8bom) if first else line
 148         # ignore trailing carriage-returns and line-feeds in lines
 149         w.write(line.rstrip(crlf).rstrip(lf))
 150         w.write(lf)
 151         first = False
 152 
 153 
 154 try:
 155     if len(argv) > 1 and argv[1] in ('-l', '--l', '-lines', '--lines'):
 156         lines_mode(argv[2:])
 157     else:
 158         bytes_mode(argv[1:])
 159 except BrokenPipeError:
 160     # quit quietly, instead of showing a confusing error message
 161     stderr.close()
 162 except KeyboardInterrupt:
 163     exit(2)
 164 except Exception as e:
 165     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 166     exit(1)