File: ihima.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from itertools import islice 27 from re import compile, Match, Pattern, IGNORECASE 28 from sys import argv, exit, maxsize, stderr, stdin, stdout 29 from typing import List 30 31 32 info = ''' 33 ihima [regexes...] 34 35 Insensitive HIlight MAtches colors all substrings case-insensitively matching 36 any of the regexes given. 37 ''' 38 39 # if len(argv) == 1: 40 # print(info.strip(), file=stderr) 41 # exit(0) 42 43 # ansi_re matches ANSI-style sequences, so they're only matched `around` 44 ansi_re = compile('\x1b\\[[0-9;]*[A-Za-z]') 45 46 47 def match(src: str, start: int, stop: int, regexes: List[Pattern]) -> Match: 48 first = None 49 for expr in regexes: 50 m = expr.search(src, start, stop) 51 if (not first) or (m and m.start() < first.start()): 52 first = m 53 return first 54 55 56 def style_line(w, s: str, regexes: List[Pattern], ansi_re: Pattern) -> None: 57 # start is used outside the regex-match loop to handle trailing parts 58 # in lines 59 start = 0 60 61 # replace all regex-matches on the line by surrounding each matched 62 # substring with ANSI styles/resets 63 while True: 64 m = ansi_re.search(s, start) 65 if not m: 66 start = style_chunk(w, s, start, maxsize, regexes) 67 break 68 69 stop = m.start() 70 start = style_chunk(w, s, start, stop, regexes) 71 # don't forget the last part of the line, or the whole line 72 stop = m.end() 73 w.write(s[start:stop]) 74 start = stop 75 76 # don't forget the last part of the line, or the whole line 77 w.write(s[start:]) 78 w.write('\n') 79 80 81 def style_chunk(w, s: str, start: int, stop: int, pats: List[Pattern]) -> int: 82 while True: 83 m = match(s, start, stop, pats) 84 if not m: 85 return start 86 87 i = m.start() 88 j = m.end() 89 90 # part before match 91 w.write(s[start:i]) 92 93 # current match 94 w.write('\x1b[7m') 95 w.write(s[i:j]) 96 w.write('\x1b[0m') 97 98 # the end of the match is the start of the `rest` of the string 99 start = j 100 101 102 try: 103 regexes = [compile(s, flags=IGNORECASE) for s in islice(argv, 1, None)] 104 for line in stdin: 105 # ignore trailing carriage-returns and/or line-feeds in input lines 106 line = line.rstrip('\r\n').rstrip('\n') 107 style_line(stdout, line, regexes, ansi_re) 108 except BrokenPipeError: 109 # quit quietly, instead of showing a confusing error message 110 stderr.close() 111 except KeyboardInterrupt: 112 exit(2)