File: links.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # links [options...] 27 # 28 # This script finds all web (hyper)links in the input(s) given, specifically 29 # HTTP/HTTPS links, showing each match on its own output line. It can match 30 # multiple links on each input line. 31 32 33 from io import TextIOWrapper 34 from re import compile, Pattern 35 from sys import argv, exit, stderr, stdin, stdout 36 from urllib.request import urlopen 37 38 39 # info is the message shown when the script isn't given any argument, or 40 # when the leading argument is one of the standard cmd-line help options 41 info = ''' 42 links [options...] 43 44 This script finds all web (hyper)links in the input(s) given, specifically 45 HTTP/HTTPS links, showing each match on its own output line. It can match 46 multiple links on each input line. 47 '''.strip() 48 49 # a leading help-option arg means show the help message and quit 50 if len(argv) == 2 and argv[1].lower() in ('-h', '--h', '-help', '--help'): 51 print(info, file=stderr) 52 exit(0) 53 54 55 def fail(msg, code: int = 1) -> None: 56 '''Show the error message given, and quit the app right away.''' 57 print(f'\x1b[31m{msg}\x1b[0m', file=stderr) 58 exit(code) 59 60 61 # no args or a leading help-option arg means show the help message and quit 62 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 63 print(info, file=stderr) 64 exit(0) 65 66 67 def seems_url(s: str) -> bool: 68 for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'): 69 if s.startswith(prot): 70 return True 71 return False 72 73 74 def handle_lines(w, src, links: Pattern) -> None: 75 for line in src: 76 for match in links.finditer(line): 77 w.write(line[match.start():match.end()]) 78 w.write('\n') 79 80 81 try: 82 if argv.count('-') > 1: 83 msg = 'reading from `-` (standard input) more than once not allowed' 84 raise ValueError(msg) 85 86 links_src = 'https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*' 87 links = compile(links_src) 88 stdout.reconfigure(newline='\n', encoding='utf-8') 89 90 # handle all named inputs given 91 for path in argv[1:]: 92 if path == '-': 93 handle_lines(stdout, stdin, links) 94 continue 95 96 if seems_url(path): 97 with urlopen(path) as inp: 98 with TextIOWrapper(inp, encoding='utf-8') as txt: 99 handle_lines(stdout, txt, links) 100 continue 101 102 with open(path, encoding='utf-8') as inp: 103 handle_lines(stdout, inp, links) 104 105 # when no filenames are given, handle lines from stdin 106 if len(argv) == 1: 107 handle_lines(stdout, stdin, links) 108 except BrokenPipeError: 109 # quit quietly, instead of showing a confusing error message 110 stderr.flush() 111 stderr.close() 112 except KeyboardInterrupt: 113 # quit quietly, instead of showing a confusing error message 114 stderr.flush() 115 stderr.close() 116 exit(2) 117 except Exception as e: 118 fail(e, 1)