#!/usr/bin/python # The MIT License (MIT) # # Copyright (c) 2026 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from io import SEEK_CUR from re import compile, Match, Pattern, IGNORECASE from sys import argv, exit, maxsize, stderr, stdin, stdout from typing import List info = ''' hima [options...] [regexes...] HIlight MAtches ANSI-styles matching regular expressions along lines read from the standard input. The regular-expression mode used is a superset of the commonly-used "extended-mode". Regexes always avoid matching any ANSI-style sequences, to avoid messing those up. Also, multiple matches in a line never overlap: at each step along a line, the earliest-starting match among the regexes always wins, as the order regexes are given among the arguments never matters. The options are, available both in single and double-dash versions -h, -help show this help message -i, -ins match regexes case-insensitively -l, -links add a case-insensitive regex to match HTTP/HTTPS links ''' if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'): print(info.strip(), file=stderr) exit(0) # ansi_re matches ANSI-style sequences, so they're only matched `around` ansi_re = compile('\x1b\\[[0-9;]*[A-Za-z]') def match(src: str, start: int, stop: int, regexes: List[Pattern]) -> Match: first = None for expr in regexes: m = expr.search(src, start, stop) if not m or m.start() == m.end(): continue if not first or m.start() < first.start(): first = m return first def style_line(w, s: str, regexes: List[Pattern], ansi_re: Pattern) -> None: # start is used outside the regex-match loop to handle trailing parts # in lines start = 0 # replace all regex-matches on the line by surrounding each matched # substring with ANSI styles/resets while True: m = ansi_re.search(s, start) if not m: start = style_chunk(w, s, start, maxsize, regexes) break stop = m.start() start = style_chunk(w, s, start, stop, regexes) # don't forget the last part of the line, or the whole line stop = m.end() w.write(s[start:stop]) start = stop # don't forget the last part of the line, or the whole line w.write(s[start:]) w.write('\n') def style_chunk(w, s: str, start: int, stop: int, pats: List[Pattern]) -> int: while True: m = match(s, start, stop, pats) if not m: return start i = m.start() j = m.end() # part before match w.write(s[start:i]) # current match w.write('\x1b[7m') w.write(s[i:j]) w.write('\x1b[0m') # the end of the match is the start of the `rest` of the string start = j try: stdout.seek(0, SEEK_CUR) live = False except: live = True flags = 0 args = argv[1:] find_links = False while len(args): if args[0] in ('-i', '--i', '-ins', '--ins'): args = args[1:] flags = IGNORECASE continue if args[0] in ('-l', '--l', '-links', '--links'): args = args[1:] find_links = True continue if args[0] == '--': args = args[1:] break break try: regexes = [compile(s, flags=flags) for s in args] if find_links: links = 'https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*' regexes.append(compile(links, flags=IGNORECASE)) for line in stdin: line = line.rstrip('\r\n').rstrip('\n') style_line(stdout, line, regexes, ansi_re) if live: stdout.flush() except KeyboardInterrupt: exit(2) except Exception as e: print(str(e), file=stderr) exit(1)