File: links.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # links [options...]
  27 #
  28 # This script finds all web (hyper)links in the input(s) given, specifically
  29 # HTTP/HTTPS links, showing each match on its own output line. It can match
  30 # multiple links on each input line.
  31 
  32 
  33 from io import TextIOWrapper
  34 from re import compile, Pattern
  35 from sys import argv, exit, stderr, stdin, stdout
  36 from urllib.request import urlopen
  37 
  38 
  39 # info is the message shown when the script isn't given any argument, or
  40 # when the leading argument is one of the standard cmd-line help options
  41 info = '''
  42 links [options...]
  43 
  44 This script finds all web (hyper)links in the input(s) given, specifically
  45 HTTP/HTTPS links, showing each match on its own output line. It can match
  46 multiple links on each input line.
  47 '''.strip()
  48 
  49 # a leading help-option arg means show the help message and quit
  50 if len(argv) == 2 and argv[1].lower() in ('-h', '--h', '-help', '--help'):
  51     print(info, file=stderr)
  52     exit(0)
  53 
  54 
  55 def fail(msg, code: int = 1) -> None:
  56     '''Show the error message given, and quit the app right away.'''
  57     print(f'\x1b[31m{msg}\x1b[0m', file=stderr)
  58     exit(code)
  59 
  60 
  61 # no args or a leading help-option arg means show the help message and quit
  62 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
  63     print(info, file=stderr)
  64     exit(0)
  65 
  66 
  67 def seems_url(s: str) -> bool:
  68     for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'):
  69         if s.startswith(prot):
  70             return True
  71     return False
  72 
  73 
  74 def handle_lines(w, src, links: Pattern) -> None:
  75     for line in src:
  76         for match in links.finditer(line):
  77             w.write(line[match.start():match.end()])
  78             w.write('\n')
  79 
  80 
  81 try:
  82     if argv.count('-') > 1:
  83         msg = 'reading from `-` (standard input) more than once not allowed'
  84         raise ValueError(msg)
  85 
  86     links_src = 'https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*'
  87     links = compile(links_src)
  88     stdout.reconfigure(newline='\n', encoding='utf-8')
  89 
  90     # handle all named inputs given
  91     for path in argv[1:]:
  92         if path == '-':
  93             handle_lines(stdout, stdin, links)
  94             continue
  95 
  96         if seems_url(path):
  97             with urlopen(path) as inp:
  98                 with TextIOWrapper(inp, encoding='utf-8') as txt:
  99                     handle_lines(stdout, txt, links)
 100             continue
 101 
 102         with open(path, encoding='utf-8') as inp:
 103             handle_lines(stdout, inp, links)
 104 
 105     # when no filenames are given, handle lines from stdin
 106     if len(argv) == 1:
 107         handle_lines(stdout, stdin, links)
 108 except BrokenPipeError:
 109     # quit quietly, instead of showing a confusing error message
 110     stderr.flush()
 111     stderr.close()
 112 except KeyboardInterrupt:
 113     # quit quietly, instead of showing a confusing error message
 114     stderr.flush()
 115     stderr.close()
 116     exit(2)
 117 except Exception as e:
 118     fail(e, 1)