File: hima.py 1 #!/usr/bin/python 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2026 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from io import SEEK_CUR 27 from re import compile, Match, Pattern, IGNORECASE 28 from sys import argv, exit, maxsize, stderr, stdin, stdout 29 from typing import List 30 31 32 info = ''' 33 hima [options...] [regexes...] 34 35 36 HIlight MAtches ANSI-styles matching regular expressions along lines read 37 from the standard input. The regular-expression mode used is a superset of 38 the commonly-used "extended-mode". 39 40 Regexes always avoid matching any ANSI-style sequences, to avoid messing 41 those up. Also, multiple matches in a line never overlap: at each step 42 along a line, the earliest-starting match among the regexes always wins, 43 as the order regexes are given among the arguments never matters. 44 45 The options are, available both in single and double-dash versions 46 47 -h, -help show this help message 48 -i, -ins match regexes case-insensitively 49 -l, -links add a case-insensitive regex to match HTTP/HTTPS links 50 ''' 51 52 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): 53 print(info.strip(), file=stderr) 54 exit(0) 55 56 # ansi_re matches ANSI-style sequences, so they're only matched `around` 57 ansi_re = compile('\x1b\\[[0-9;]*[A-Za-z]') 58 59 60 def match(src: str, start: int, stop: int, regexes: List[Pattern]) -> Match: 61 first = None 62 for expr in regexes: 63 m = expr.search(src, start, stop) 64 if not m or m.start() == m.end(): 65 continue 66 if not first or m.start() < first.start(): 67 first = m 68 return first 69 70 71 def style_line(w, s: str, regexes: List[Pattern], ansi_re: Pattern) -> None: 72 # start is used outside the regex-match loop to handle trailing parts 73 # in lines 74 start = 0 75 76 # replace all regex-matches on the line by surrounding each matched 77 # substring with ANSI styles/resets 78 while True: 79 m = ansi_re.search(s, start) 80 if not m: 81 start = style_chunk(w, s, start, maxsize, regexes) 82 break 83 84 stop = m.start() 85 start = style_chunk(w, s, start, stop, regexes) 86 # don't forget the last part of the line, or the whole line 87 stop = m.end() 88 w.write(s[start:stop]) 89 start = stop 90 91 # don't forget the last part of the line, or the whole line 92 w.write(s[start:]) 93 w.write('\n') 94 95 96 def style_chunk(w, s: str, start: int, stop: int, pats: List[Pattern]) -> int: 97 while True: 98 m = match(s, start, stop, pats) 99 if not m: 100 return start 101 102 i = m.start() 103 j = m.end() 104 105 # part before match 106 w.write(s[start:i]) 107 108 # current match 109 w.write('\x1b[7m') 110 w.write(s[i:j]) 111 w.write('\x1b[0m') 112 113 # the end of the match is the start of the `rest` of the string 114 start = j 115 116 117 try: 118 stdout.seek(0, SEEK_CUR) 119 live = False 120 except: 121 live = True 122 123 flags = 0 124 args = argv[1:] 125 find_links = False 126 127 while len(args): 128 if args[0] in ('-i', '--i', '-ins', '--ins'): 129 args = args[1:] 130 flags = IGNORECASE 131 continue 132 if args[0] in ('-l', '--l', '-links', '--links'): 133 args = args[1:] 134 find_links = True 135 continue 136 if args[0] == '--': 137 args = args[1:] 138 break 139 break 140 141 try: 142 regexes = [compile(s, flags=flags) for s in args] 143 if find_links: 144 links = 'https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*' 145 regexes.append(compile(links, flags=IGNORECASE)) 146 147 for line in stdin: 148 line = line.rstrip('\r\n').rstrip('\n') 149 style_line(stdout, line, regexes, ansi_re) 150 if live: 151 stdout.flush() 152 except KeyboardInterrupt: 153 exit(2) 154 except Exception as e: 155 print(str(e), file=stderr) 156 exit(1)