File: hima.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2025 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from io import SEEK_CUR 27 from re import compile, Match, Pattern, IGNORECASE 28 from sys import argv, exit, maxsize, stderr, stdin, stdout 29 from typing import List 30 31 32 info = ''' 33 hima [options...] [regexes...] 34 35 36 HIlight MAtches ANSI-styles matching regular expressions along lines read 37 from the standard input. The regular-expression mode used is a superset of 38 the commonly-used "extended-mode". 39 40 Regexes always avoid matching any ANSI-style sequences, to avoid messing 41 those up. Also, multiple matches in a line never overlap: at each step 42 along a line, the earliest-starting match among the regexes always wins, 43 as the order regexes are given among the arguments never matters. 44 45 The options are, available both in single and double-dash versions 46 47 -h, -help show this help message 48 -i, -ins match regexes case-insensitively 49 -l, -links add a case-insensitive regex to match HTTP/HTTPS links 50 ''' 51 52 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 53 print(info.strip(), file=stderr) 54 exit(0) 55 56 # ansi_re matches ANSI-style sequences, so they're only matched `around` 57 ansi_re = compile('\x1b\\[[0-9;]*[A-Za-z]') 58 59 60 def match(src: str, start: int, stop: int, regexes: List[Pattern]) -> Match: 61 first = None 62 for expr in regexes: 63 m = expr.search(src, start, stop) 64 if (not first) or (m and m.start() < first.start()): 65 first = m 66 return first 67 68 69 def style_line(w, s: str, regexes: List[Pattern], ansi_re: Pattern) -> None: 70 # start is used outside the regex-match loop to handle trailing parts 71 # in lines 72 start = 0 73 74 # replace all regex-matches on the line by surrounding each matched 75 # substring with ANSI styles/resets 76 while True: 77 m = ansi_re.search(s, start) 78 if not m: 79 start = style_chunk(w, s, start, maxsize, regexes) 80 break 81 82 stop = m.start() 83 start = style_chunk(w, s, start, stop, regexes) 84 # don't forget the last part of the line, or the whole line 85 stop = m.end() 86 w.write(s[start:stop]) 87 start = stop 88 89 # don't forget the last part of the line, or the whole line 90 w.write(s[start:]) 91 w.write('\n') 92 93 94 def style_chunk(w, s: str, start: int, stop: int, pats: List[Pattern]) -> int: 95 while True: 96 m = match(s, start, stop, pats) 97 if not m: 98 return start 99 100 i = m.start() 101 j = m.end() 102 103 # part before match 104 w.write(s[start:i]) 105 106 # current match 107 w.write('\x1b[7m') 108 w.write(s[i:j]) 109 w.write('\x1b[0m') 110 111 # the end of the match is the start of the `rest` of the string 112 start = j 113 114 115 try: 116 stdout.seek(0, SEEK_CUR) 117 live = False 118 except: 119 live = True 120 121 flags = 0 122 args = argv[1:] 123 find_links = False 124 125 while len(args): 126 if args[0] in ('-i', '--i', '-ins', '--ins'): 127 args = args[1:] 128 flags = IGNORECASE 129 continue 130 if args[0] in ('-l', '--l', '-links', '--links'): 131 args = args[1:] 132 find_links = True 133 continue 134 if args[0] == '--': 135 args = args[1:] 136 break 137 break 138 139 try: 140 regexes = [compile(s, flags=flags) for s in args] 141 if find_links: 142 links = 'https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*' 143 regexes.append(compile(links, flags=IGNORECASE)) 144 145 for line in stdin: 146 # ignore trailing carriage-returns and/or line-feeds in input lines 147 line = line.rstrip('\r\n').rstrip('\n') 148 style_line(stdout, line, regexes, ansi_re) 149 if live: 150 stdout.flush() 151 except KeyboardInterrupt: 152 exit(2) 153 except Exception as e: 154 print(str(e), file=stderr) 155 exit(1)