File: nh.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from os import fstat 27 from sys import argv, exit, stderr, stdin, stdout 28 29 30 info = ''' 31 nh [options...] [filepaths/URIs...] 32 33 34 Nice Hexadecimals is a byte-viewer which shows bytes as base-16 values, 35 using various ANSI styles to color-code output. 36 37 Output lines end with a panel showing all ASCII sequences detected along: 38 each such panel also includes all ASCII from the next row as well, since 39 not doing that would make grepping/matching whole strings less reliable, 40 as some matches may be missed simply due to the narrowness of the panel. 41 42 Options, where leading double-dashes are also allowed: 43 44 -h show this help message 45 -help same as -h 46 47 -n narrow output, which fits 80-column mode 48 -narrow same as -n 49 ''' 50 51 52 # bytes2styled_hex has `pre-rendered` strings for each possible byte 53 bytes2styled_hex = ( 54 '\x1b[38;5;111m00 ', '\x1b[38;5;246m01 ', '\x1b[38;5;246m02 ', 55 '\x1b[38;5;246m03 ', '\x1b[38;5;246m04 ', '\x1b[38;5;246m05 ', 56 '\x1b[38;5;246m06 ', '\x1b[38;5;246m07 ', '\x1b[38;5;246m08 ', 57 '\x1b[38;5;246m09 ', '\x1b[38;5;246m0a ', '\x1b[38;5;246m0b ', 58 '\x1b[38;5;246m0c ', '\x1b[38;5;246m0d ', '\x1b[38;5;246m0e ', 59 '\x1b[38;5;246m0f ', '\x1b[38;5;246m10 ', '\x1b[38;5;246m11 ', 60 '\x1b[38;5;246m12 ', '\x1b[38;5;246m13 ', '\x1b[38;5;246m14 ', 61 '\x1b[38;5;246m15 ', '\x1b[38;5;246m16 ', '\x1b[38;5;246m17 ', 62 '\x1b[38;5;246m18 ', '\x1b[38;5;246m19 ', '\x1b[38;5;246m1a ', 63 '\x1b[38;5;246m1b ', '\x1b[38;5;246m1c ', '\x1b[38;5;246m1d ', 64 '\x1b[38;5;246m1e ', '\x1b[38;5;246m1f ', 65 '\x1b[38;5;72m20\x1b[38;5;239m ', '\x1b[38;5;72m21\x1b[38;5;239m!', 66 '\x1b[38;5;72m22\x1b[38;5;239m"', '\x1b[38;5;72m23\x1b[38;5;239m#', 67 '\x1b[38;5;72m24\x1b[38;5;239m$', '\x1b[38;5;72m25\x1b[38;5;239m%', 68 '\x1b[38;5;72m26\x1b[38;5;239m&', '\x1b[38;5;72m27\x1b[38;5;239m\'', 69 '\x1b[38;5;72m28\x1b[38;5;239m(', '\x1b[38;5;72m29\x1b[38;5;239m)', 70 '\x1b[38;5;72m2a\x1b[38;5;239m*', '\x1b[38;5;72m2b\x1b[38;5;239m+', 71 '\x1b[38;5;72m2c\x1b[38;5;239m,', '\x1b[38;5;72m2d\x1b[38;5;239m-', 72 '\x1b[38;5;72m2e\x1b[38;5;239m.', '\x1b[38;5;72m2f\x1b[38;5;239m/', 73 '\x1b[38;5;72m30\x1b[38;5;239m0', '\x1b[38;5;72m31\x1b[38;5;239m1', 74 '\x1b[38;5;72m32\x1b[38;5;239m2', '\x1b[38;5;72m33\x1b[38;5;239m3', 75 '\x1b[38;5;72m34\x1b[38;5;239m4', '\x1b[38;5;72m35\x1b[38;5;239m5', 76 '\x1b[38;5;72m36\x1b[38;5;239m6', '\x1b[38;5;72m37\x1b[38;5;239m7', 77 '\x1b[38;5;72m38\x1b[38;5;239m8', '\x1b[38;5;72m39\x1b[38;5;239m9', 78 '\x1b[38;5;72m3a\x1b[38;5;239m:', '\x1b[38;5;72m3b\x1b[38;5;239m;', 79 '\x1b[38;5;72m3c\x1b[38;5;239m<', '\x1b[38;5;72m3d\x1b[38;5;239m=', 80 '\x1b[38;5;72m3e\x1b[38;5;239m>', '\x1b[38;5;72m3f\x1b[38;5;239m?', 81 '\x1b[38;5;72m40\x1b[38;5;239m@', '\x1b[38;5;72m41\x1b[38;5;239mA', 82 '\x1b[38;5;72m42\x1b[38;5;239mB', '\x1b[38;5;72m43\x1b[38;5;239mC', 83 '\x1b[38;5;72m44\x1b[38;5;239mD', '\x1b[38;5;72m45\x1b[38;5;239mE', 84 '\x1b[38;5;72m46\x1b[38;5;239mF', '\x1b[38;5;72m47\x1b[38;5;239mG', 85 '\x1b[38;5;72m48\x1b[38;5;239mH', '\x1b[38;5;72m49\x1b[38;5;239mI', 86 '\x1b[38;5;72m4a\x1b[38;5;239mJ', '\x1b[38;5;72m4b\x1b[38;5;239mK', 87 '\x1b[38;5;72m4c\x1b[38;5;239mL', '\x1b[38;5;72m4d\x1b[38;5;239mM', 88 '\x1b[38;5;72m4e\x1b[38;5;239mN', '\x1b[38;5;72m4f\x1b[38;5;239mO', 89 '\x1b[38;5;72m50\x1b[38;5;239mP', '\x1b[38;5;72m51\x1b[38;5;239mQ', 90 '\x1b[38;5;72m52\x1b[38;5;239mR', '\x1b[38;5;72m53\x1b[38;5;239mS', 91 '\x1b[38;5;72m54\x1b[38;5;239mT', '\x1b[38;5;72m55\x1b[38;5;239mU', 92 '\x1b[38;5;72m56\x1b[38;5;239mV', '\x1b[38;5;72m57\x1b[38;5;239mW', 93 '\x1b[38;5;72m58\x1b[38;5;239mX', '\x1b[38;5;72m59\x1b[38;5;239mY', 94 '\x1b[38;5;72m5a\x1b[38;5;239mZ', '\x1b[38;5;72m5b\x1b[38;5;239m[', 95 '\x1b[38;5;72m5c\x1b[38;5;239m\\', '\x1b[38;5;72m5d\x1b[38;5;239m]', 96 '\x1b[38;5;72m5e\x1b[38;5;239m^', '\x1b[38;5;72m5f\x1b[38;5;239m_', 97 '\x1b[38;5;72m60\x1b[38;5;239m`', '\x1b[38;5;72m61\x1b[38;5;239ma', 98 '\x1b[38;5;72m62\x1b[38;5;239mb', '\x1b[38;5;72m63\x1b[38;5;239mc', 99 '\x1b[38;5;72m64\x1b[38;5;239md', '\x1b[38;5;72m65\x1b[38;5;239me', 100 '\x1b[38;5;72m66\x1b[38;5;239mf', '\x1b[38;5;72m67\x1b[38;5;239mg', 101 '\x1b[38;5;72m68\x1b[38;5;239mh', '\x1b[38;5;72m69\x1b[38;5;239mi', 102 '\x1b[38;5;72m6a\x1b[38;5;239mj', '\x1b[38;5;72m6b\x1b[38;5;239mk', 103 '\x1b[38;5;72m6c\x1b[38;5;239ml', '\x1b[38;5;72m6d\x1b[38;5;239mm', 104 '\x1b[38;5;72m6e\x1b[38;5;239mn', '\x1b[38;5;72m6f\x1b[38;5;239mo', 105 '\x1b[38;5;72m70\x1b[38;5;239mp', '\x1b[38;5;72m71\x1b[38;5;239mq', 106 '\x1b[38;5;72m72\x1b[38;5;239mr', '\x1b[38;5;72m73\x1b[38;5;239ms', 107 '\x1b[38;5;72m74\x1b[38;5;239mt', '\x1b[38;5;72m75\x1b[38;5;239mu', 108 '\x1b[38;5;72m76\x1b[38;5;239mv', '\x1b[38;5;72m77\x1b[38;5;239mw', 109 '\x1b[38;5;72m78\x1b[38;5;239mx', '\x1b[38;5;72m79\x1b[38;5;239my', 110 '\x1b[38;5;72m7a\x1b[38;5;239mz', '\x1b[38;5;72m7b\x1b[38;5;239m{', 111 '\x1b[38;5;72m7c\x1b[38;5;239m|', '\x1b[38;5;72m7d\x1b[38;5;239m}', 112 '\x1b[38;5;72m7e\x1b[38;5;239m~', '\x1b[38;5;246m7f ', 113 '\x1b[38;5;246m80 ', '\x1b[38;5;246m81 ', '\x1b[38;5;246m82 ', 114 '\x1b[38;5;246m83 ', '\x1b[38;5;246m84 ', '\x1b[38;5;246m85 ', 115 '\x1b[38;5;246m86 ', '\x1b[38;5;246m87 ', '\x1b[38;5;246m88 ', 116 '\x1b[38;5;246m89 ', '\x1b[38;5;246m8a ', '\x1b[38;5;246m8b ', 117 '\x1b[38;5;246m8c ', '\x1b[38;5;246m8d ', '\x1b[38;5;246m8e ', 118 '\x1b[38;5;246m8f ', '\x1b[38;5;246m90 ', '\x1b[38;5;246m91 ', 119 '\x1b[38;5;246m92 ', '\x1b[38;5;246m93 ', '\x1b[38;5;246m94 ', 120 '\x1b[38;5;246m95 ', '\x1b[38;5;246m96 ', '\x1b[38;5;246m97 ', 121 '\x1b[38;5;246m98 ', '\x1b[38;5;246m99 ', '\x1b[38;5;246m9a ', 122 '\x1b[38;5;246m9b ', '\x1b[38;5;246m9c ', '\x1b[38;5;246m9d ', 123 '\x1b[38;5;246m9e ', '\x1b[38;5;246m9f ', '\x1b[38;5;246ma0 ', 124 '\x1b[38;5;246ma1 ', '\x1b[38;5;246ma2 ', '\x1b[38;5;246ma3 ', 125 '\x1b[38;5;246ma4 ', '\x1b[38;5;246ma5 ', '\x1b[38;5;246ma6 ', 126 '\x1b[38;5;246ma7 ', '\x1b[38;5;246ma8 ', '\x1b[38;5;246ma9 ', 127 '\x1b[38;5;246maa ', '\x1b[38;5;246mab ', '\x1b[38;5;246mac ', 128 '\x1b[38;5;246mad ', '\x1b[38;5;246mae ', '\x1b[38;5;246maf ', 129 '\x1b[38;5;246mb0 ', '\x1b[38;5;246mb1 ', '\x1b[38;5;246mb2 ', 130 '\x1b[38;5;246mb3 ', '\x1b[38;5;246mb4 ', '\x1b[38;5;246mb5 ', 131 '\x1b[38;5;246mb6 ', '\x1b[38;5;246mb7 ', '\x1b[38;5;246mb8 ', 132 '\x1b[38;5;246mb9 ', '\x1b[38;5;246mba ', '\x1b[38;5;246mbb ', 133 '\x1b[38;5;246mbc ', '\x1b[38;5;246mbd ', '\x1b[38;5;246mbe ', 134 '\x1b[38;5;246mbf ', '\x1b[38;5;246mc0 ', '\x1b[38;5;246mc1 ', 135 '\x1b[38;5;246mc2 ', '\x1b[38;5;246mc3 ', '\x1b[38;5;246mc4 ', 136 '\x1b[38;5;246mc5 ', '\x1b[38;5;246mc6 ', '\x1b[38;5;246mc7 ', 137 '\x1b[38;5;246mc8 ', '\x1b[38;5;246mc9 ', '\x1b[38;5;246mca ', 138 '\x1b[38;5;246mcb ', '\x1b[38;5;246mcc ', '\x1b[38;5;246mcd ', 139 '\x1b[38;5;246mce ', '\x1b[38;5;246mcf ', '\x1b[38;5;246md0 ', 140 '\x1b[38;5;246md1 ', '\x1b[38;5;246md2 ', '\x1b[38;5;246md3 ', 141 '\x1b[38;5;246md4 ', '\x1b[38;5;246md5 ', '\x1b[38;5;246md6 ', 142 '\x1b[38;5;246md7 ', '\x1b[38;5;246md8 ', '\x1b[38;5;246md9 ', 143 '\x1b[38;5;246mda ', '\x1b[38;5;246mdb ', '\x1b[38;5;246mdc ', 144 '\x1b[38;5;246mdd ', '\x1b[38;5;246mde ', '\x1b[38;5;246mdf ', 145 '\x1b[38;5;246me0 ', '\x1b[38;5;246me1 ', '\x1b[38;5;246me2 ', 146 '\x1b[38;5;246me3 ', '\x1b[38;5;246me4 ', '\x1b[38;5;246me5 ', 147 '\x1b[38;5;246me6 ', '\x1b[38;5;246me7 ', '\x1b[38;5;246me8 ', 148 '\x1b[38;5;246me9 ', '\x1b[38;5;246mea ', '\x1b[38;5;246meb ', 149 '\x1b[38;5;246mec ', '\x1b[38;5;246med ', '\x1b[38;5;246mee ', 150 '\x1b[38;5;246mef ', '\x1b[38;5;246mf0 ', '\x1b[38;5;246mf1 ', 151 '\x1b[38;5;246mf2 ', '\x1b[38;5;246mf3 ', '\x1b[38;5;246mf4 ', 152 '\x1b[38;5;246mf5 ', '\x1b[38;5;246mf6 ', '\x1b[38;5;246mf7 ', 153 '\x1b[38;5;246mf8 ', '\x1b[38;5;246mf9 ', '\x1b[38;5;246mfa ', 154 '\x1b[38;5;246mfb ', '\x1b[38;5;246mfc ', '\x1b[38;5;246mfd ', 155 '\x1b[38;5;246mfe ', '\x1b[38;5;209mff ', 156 ) 157 158 # int2ascii slightly speeds up func show_ascii 159 int2ascii = tuple(chr(i) if 32 <= i < 127 else ' ' for i in range(256)) 160 161 # visible noticeably speeds up func show_ascii; notice how spaces (code 32) 162 # aren't considered visible symbols, which makes sense in func show_ascii 163 visible = tuple(32 < i < 127 for i in range(256)) 164 165 166 def show_hex(w, src, chunk_size: int = 16) -> None: 167 'Handle all input from the source given, emitting styled output.' 168 169 # make the ruler/line-breather, which shows up every 5 hex-output lines 170 pre = 8 * ' ' 171 pat = ' ·' 172 pat = int(3 * chunk_size / len(pat)) * pat 173 sep_line = f'{pre} \x1b[38;5;245m{pat}\x1b[0m\n' 174 175 # n is the current byte offset shown at the start of each display line 176 n = 0 177 178 # lines keeps track of the main output line/row count, to figure out 179 # when to put `breather` lines 180 lines = 0 181 182 # prev remembers the previous chunk, as showing ASCII content for 183 # 2 output-lines worth of bytes requires staying 1 step behind, so 184 # to speak 185 prev = src.read(chunk_size) 186 if len(prev) == 0: 187 return 188 189 while True: 190 chunk = src.read(chunk_size) 191 if len(chunk) == 0: 192 break 193 194 if lines % 5 == 0 and lines > 0: 195 w.write(sep_line) 196 show_line(w, n, prev, chunk, chunk_size) 197 198 n += len(prev) 199 prev = chunk 200 lines += 1 201 202 # don't forget the last output line 203 if len(prev) > 0: 204 if lines % 5 == 0 and lines > 0: 205 w.write(sep_line) 206 show_line(w, n, prev, bytes(), chunk_size) 207 208 209 def show_line(w, n: int, prev, chunk, chunk_size: int) -> None: 210 'Help func show_hex do its job, simplifying its control flow.' 211 212 # w.write(f'{n:8} \x1b[48;5;254m') 213 show_restyled_uint(w, n, 8) 214 w.write(' \x1b[48;5;254m') 215 for e in prev: 216 w.write(bytes2styled_hex[e]) 217 w.write('\x1b[0m') 218 show_ascii(w, prev, chunk, 3 * (chunk_size - len(prev)) + 2) 219 w.write('\n') 220 221 222 def show_restyled_uint(w, n: int, width: int) -> None: 223 'Alternate styles on 3-item chunks of digits from the integer given.' 224 225 digits = str(n) 226 l = len(digits) 227 228 # left-pad digits with spaces to fill the output-width given 229 write_spaces(w, width - l) 230 231 # it's quicker to just emit short-enough digit-runs verbatim 232 if l < 4: 233 w.write(digits) 234 return 235 236 # emit leading chunk of digits, which is the only one which 237 # can have fewer than 3 items 238 lead = l % 3 239 w.write(digits[:lead]) 240 241 # the rest of the string now has a multiple of 3 items left 242 start = lead 243 244 # start by styling the next digit-group only if there was a 245 # non-empty leading group at the start of the full digit-run 246 use_style = lead > 0 247 248 # alternate styles until the string is over 249 while start < l: 250 # the digits left are always a multiple of 3 251 stop = start + 3 252 253 if use_style: 254 w.write('\x1b[38;5;248m') 255 w.write(digits[start:stop]) 256 w.write('\x1b[0m') 257 else: 258 w.write(digits[start:stop]) 259 260 # switch style and advance to the next 3-digit chunk 261 use_style = not use_style 262 start = stop 263 264 265 def show_ascii(w, first, second: bytes, pre: int) -> None: 266 'Emit the ASCII side-panel for func show_hex.' 267 268 # prev_vis keeps track of the previous byte's `visibility`, so spaces 269 # are added when bytes change from non-visible-ASCII to visible-ASCII 270 prev_vis = False 271 272 is_vis = False 273 spaces = pre 274 275 # show ASCII symbols from the first `line` in the pair 276 for e in first: 277 is_vis = visible[e] 278 if is_vis: 279 if not prev_vis: 280 write_spaces(w, spaces) 281 spaces = 1 282 w.write(int2ascii[e]) 283 prev_vis = is_vis 284 285 # do the same for the second `line` in the pair 286 for e in second: 287 is_vis = visible[e] 288 if is_vis: 289 if not prev_vis: 290 write_spaces(w, spaces) 291 spaces = 1 292 w.write(int2ascii[e]) 293 prev_vis = is_vis 294 295 296 def write_spaces(w, n: int) -> None: 297 'Emit the number of spaces given, minimizing `write` calls.' 298 299 if n < 1: 300 return 301 302 if n < len(spaces): 303 w.write(spaces[n]) 304 return 305 306 while n >= len(spaces): 307 w.write(spaces[-1]) 308 n -= len(spaces) 309 w.write(spaces[n]) 310 311 312 def seems_url(s: str) -> bool: 313 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 314 return any(s.startswith(p) for p in protocols) 315 316 317 # args is the `proper` list of arguments given to the script 318 args = argv[1:] 319 320 # a leading help-option arg means show the help message and quit 321 if len(args) > 0 and args[0] in ('-h', '--h', '-help', '--help'): 322 print(info.strip(), file=stderr) 323 exit(0) 324 325 # narrow-output is to fit results in 80-column mode 326 bytes_per_line = 16 327 if len(args) > 0 and args[0] in ('-n', '--n', '-narrow', '--narrow'): 328 bytes_per_line = 12 329 args = args[1:] 330 elif len(args) > 0: 331 # allow a leading integer argument to set exactly how many bytes per 332 # line to show in the styled output, before the ASCII-panel contents 333 try: 334 # try to parse an integer number, after turning double-dashes 335 # into single ones, which may lead to parsed negative integers 336 n = int(args[0].lstrip('-')) 337 # negative integers are a result of option-style leading dashes 338 n = int(abs(n)) 339 340 if n > 0: 341 # only change the width-setting if leading number isn't zero 342 bytes_per_line = n 343 # don't treat a leading integer as a filepath, no matter what 344 args = args[1:] 345 except Exception: 346 # avoid exceptions if leading arg isn't a valid integer 347 pass 348 349 # spaces lets func write_spaces minimize `write` operations, resulting in 350 # noticeable speed-ups when the script deals with megabytes of data 351 spaces = tuple(i * ' ' for i in range(3 * bytes_per_line + 4)) 352 353 try: 354 if args.count('-') > 1: 355 msg = 'reading from `-` (standard input) more than once not allowed' 356 raise ValueError(msg) 357 358 if any(seems_url(e) for e in args): 359 from urllib.request import urlopen 360 361 for i, path in enumerate(args): 362 if i > 0: 363 stdout.write('\n') 364 stdout.write('\n') 365 366 if path == '-': 367 stdout.write('• - (<stdin>)\n') 368 stdout.write('\n') 369 show_hex(stdout, stdin.buffer, bytes_per_line) 370 continue 371 372 if seems_url(path): 373 with urlopen(path) as inp: 374 stdout.write(f'• {path}\n') 375 stdout.write('\n') 376 show_hex(stdout, inp, bytes_per_line) 377 continue 378 379 with open(path, mode='rb', buffering=4_960) as inp: 380 n = fstat(inp.fileno()).st_size 381 stdout.write(f'• {path} \x1b[38;5;245m({n:,} bytes)\x1b[0m\n') 382 stdout.write('\n') 383 show_hex(stdout, inp, bytes_per_line) 384 385 if len(args) == 0: 386 stdout.write('• <stdin>\n') 387 stdout.write('\n') 388 show_hex(stdout, stdin.buffer, bytes_per_line) 389 except BrokenPipeError: 390 # quit quietly, instead of showing a confusing error message 391 stderr.close() 392 except KeyboardInterrupt: 393 exit(2) 394 except Exception as e: 395 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 396 exit(1)