File: chex.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # chex [options...] [filepaths/URIs...] 27 # 28 # Colored HEXadecimal byte-viewer shows file/input bytes as base-16 values, 29 # using various ANSI styles to color-code output. 30 # 31 # Output lines end with a panel showing all ASCII sequences detected along. 32 # 33 # Options, where leading double-dashes are also allowed: 34 # 35 # -h show this help message 36 # -help same as -h 37 # 38 # -n narrow output, which fits 80-column mode 39 # -narrow same as -n 40 41 42 from os import fstat 43 from sys import argv, exit, stderr, stdin, stdout 44 from urllib.request import urlopen 45 46 47 # info is the message shown when the script isn't given any argument, or 48 # when the leading argument is one of the standard cmd-line help options 49 info = ''' 50 chex [options...] [filepaths/URIs...] 51 52 Colored HEXadecimal byte-viewer shows file/input bytes as base-16 values, 53 using various ANSI styles to color-code output. 54 55 Output lines end with a panel showing all ASCII sequences detected along. 56 57 Options, where leading double-dashes are also allowed: 58 59 -h show this help message 60 -help same as -h 61 62 -n narrow output, which fits 80-column mode 63 -narrow same as -n 64 '''.strip() 65 66 67 # bytes2styled_hex has 256 `pre-rendered` strings, one for each possible byte 68 byte2styled_hex = ( 69 '\x1b[38;5;111m00 ', '\x1b[38;5;246m01 ', 70 '\x1b[38;5;246m02 ', '\x1b[38;5;246m03 ', 71 '\x1b[38;5;246m04 ', '\x1b[38;5;246m05 ', 72 '\x1b[38;5;246m06 ', '\x1b[38;5;246m07 ', 73 '\x1b[38;5;246m08 ', '\x1b[38;5;246m09 ', 74 '\x1b[38;5;246m0a ', '\x1b[38;5;246m0b ', 75 '\x1b[38;5;246m0c ', '\x1b[38;5;246m0d ', 76 '\x1b[38;5;246m0e ', '\x1b[38;5;246m0f ', 77 '\x1b[38;5;246m10 ', '\x1b[38;5;246m11 ', 78 '\x1b[38;5;246m12 ', '\x1b[38;5;246m13 ', 79 '\x1b[38;5;246m14 ', '\x1b[38;5;246m15 ', 80 '\x1b[38;5;246m16 ', '\x1b[38;5;246m17 ', 81 '\x1b[38;5;246m18 ', '\x1b[38;5;246m19 ', 82 '\x1b[38;5;246m1a ', '\x1b[38;5;246m1b ', 83 '\x1b[38;5;246m1c ', '\x1b[38;5;246m1d ', 84 '\x1b[38;5;246m1e ', '\x1b[38;5;246m1f ', 85 '\x1b[38;5;72m20\x1b[38;5;239m ', '\x1b[38;5;72m21\x1b[38;5;239m!', 86 '\x1b[38;5;72m22\x1b[38;5;239m"', '\x1b[38;5;72m23\x1b[38;5;239m#', 87 '\x1b[38;5;72m24\x1b[38;5;239m$', '\x1b[38;5;72m25\x1b[38;5;239m%', 88 '\x1b[38;5;72m26\x1b[38;5;239m&', '\x1b[38;5;72m27\x1b[38;5;239m\'', 89 '\x1b[38;5;72m28\x1b[38;5;239m(', '\x1b[38;5;72m29\x1b[38;5;239m)', 90 '\x1b[38;5;72m2a\x1b[38;5;239m*', '\x1b[38;5;72m2b\x1b[38;5;239m+', 91 '\x1b[38;5;72m2c\x1b[38;5;239m,', '\x1b[38;5;72m2d\x1b[38;5;239m-', 92 '\x1b[38;5;72m2e\x1b[38;5;239m.', '\x1b[38;5;72m2f\x1b[38;5;239m/', 93 '\x1b[38;5;72m30\x1b[38;5;239m0', '\x1b[38;5;72m31\x1b[38;5;239m1', 94 '\x1b[38;5;72m32\x1b[38;5;239m2', '\x1b[38;5;72m33\x1b[38;5;239m3', 95 '\x1b[38;5;72m34\x1b[38;5;239m4', '\x1b[38;5;72m35\x1b[38;5;239m5', 96 '\x1b[38;5;72m36\x1b[38;5;239m6', '\x1b[38;5;72m37\x1b[38;5;239m7', 97 '\x1b[38;5;72m38\x1b[38;5;239m8', '\x1b[38;5;72m39\x1b[38;5;239m9', 98 '\x1b[38;5;72m3a\x1b[38;5;239m:', '\x1b[38;5;72m3b\x1b[38;5;239m;', 99 '\x1b[38;5;72m3c\x1b[38;5;239m<', '\x1b[38;5;72m3d\x1b[38;5;239m=', 100 '\x1b[38;5;72m3e\x1b[38;5;239m>', '\x1b[38;5;72m3f\x1b[38;5;239m?', 101 '\x1b[38;5;72m40\x1b[38;5;239m@', '\x1b[38;5;72m41\x1b[38;5;239mA', 102 '\x1b[38;5;72m42\x1b[38;5;239mB', '\x1b[38;5;72m43\x1b[38;5;239mC', 103 '\x1b[38;5;72m44\x1b[38;5;239mD', '\x1b[38;5;72m45\x1b[38;5;239mE', 104 '\x1b[38;5;72m46\x1b[38;5;239mF', '\x1b[38;5;72m47\x1b[38;5;239mG', 105 '\x1b[38;5;72m48\x1b[38;5;239mH', '\x1b[38;5;72m49\x1b[38;5;239mI', 106 '\x1b[38;5;72m4a\x1b[38;5;239mJ', '\x1b[38;5;72m4b\x1b[38;5;239mK', 107 '\x1b[38;5;72m4c\x1b[38;5;239mL', '\x1b[38;5;72m4d\x1b[38;5;239mM', 108 '\x1b[38;5;72m4e\x1b[38;5;239mN', '\x1b[38;5;72m4f\x1b[38;5;239mO', 109 '\x1b[38;5;72m50\x1b[38;5;239mP', '\x1b[38;5;72m51\x1b[38;5;239mQ', 110 '\x1b[38;5;72m52\x1b[38;5;239mR', '\x1b[38;5;72m53\x1b[38;5;239mS', 111 '\x1b[38;5;72m54\x1b[38;5;239mT', '\x1b[38;5;72m55\x1b[38;5;239mU', 112 '\x1b[38;5;72m56\x1b[38;5;239mV', '\x1b[38;5;72m57\x1b[38;5;239mW', 113 '\x1b[38;5;72m58\x1b[38;5;239mX', '\x1b[38;5;72m59\x1b[38;5;239mY', 114 '\x1b[38;5;72m5a\x1b[38;5;239mZ', '\x1b[38;5;72m5b\x1b[38;5;239m[', 115 '\x1b[38;5;72m5c\x1b[38;5;239m\\', '\x1b[38;5;72m5d\x1b[38;5;239m]', 116 '\x1b[38;5;72m5e\x1b[38;5;239m^', '\x1b[38;5;72m5f\x1b[38;5;239m_', 117 '\x1b[38;5;72m60\x1b[38;5;239m`', '\x1b[38;5;72m61\x1b[38;5;239ma', 118 '\x1b[38;5;72m62\x1b[38;5;239mb', '\x1b[38;5;72m63\x1b[38;5;239mc', 119 '\x1b[38;5;72m64\x1b[38;5;239md', '\x1b[38;5;72m65\x1b[38;5;239me', 120 '\x1b[38;5;72m66\x1b[38;5;239mf', '\x1b[38;5;72m67\x1b[38;5;239mg', 121 '\x1b[38;5;72m68\x1b[38;5;239mh', '\x1b[38;5;72m69\x1b[38;5;239mi', 122 '\x1b[38;5;72m6a\x1b[38;5;239mj', '\x1b[38;5;72m6b\x1b[38;5;239mk', 123 '\x1b[38;5;72m6c\x1b[38;5;239ml', '\x1b[38;5;72m6d\x1b[38;5;239mm', 124 '\x1b[38;5;72m6e\x1b[38;5;239mn', '\x1b[38;5;72m6f\x1b[38;5;239mo', 125 '\x1b[38;5;72m70\x1b[38;5;239mp', '\x1b[38;5;72m71\x1b[38;5;239mq', 126 '\x1b[38;5;72m72\x1b[38;5;239mr', '\x1b[38;5;72m73\x1b[38;5;239ms', 127 '\x1b[38;5;72m74\x1b[38;5;239mt', '\x1b[38;5;72m75\x1b[38;5;239mu', 128 '\x1b[38;5;72m76\x1b[38;5;239mv', '\x1b[38;5;72m77\x1b[38;5;239mw', 129 '\x1b[38;5;72m78\x1b[38;5;239mx', '\x1b[38;5;72m79\x1b[38;5;239my', 130 '\x1b[38;5;72m7a\x1b[38;5;239mz', '\x1b[38;5;72m7b\x1b[38;5;239m{', 131 '\x1b[38;5;72m7c\x1b[38;5;239m|', '\x1b[38;5;72m7d\x1b[38;5;239m}', 132 '\x1b[38;5;72m7e\x1b[38;5;239m~', '\x1b[38;5;246m7f ', 133 '\x1b[38;5;246m80 ', '\x1b[38;5;246m81 ', 134 '\x1b[38;5;246m82 ', '\x1b[38;5;246m83 ', 135 '\x1b[38;5;246m84 ', '\x1b[38;5;246m85 ', 136 '\x1b[38;5;246m86 ', '\x1b[38;5;246m87 ', 137 '\x1b[38;5;246m88 ', '\x1b[38;5;246m89 ', 138 '\x1b[38;5;246m8a ', '\x1b[38;5;246m8b ', 139 '\x1b[38;5;246m8c ', '\x1b[38;5;246m8d ', 140 '\x1b[38;5;246m8e ', '\x1b[38;5;246m8f ', 141 '\x1b[38;5;246m90 ', '\x1b[38;5;246m91 ', 142 '\x1b[38;5;246m92 ', '\x1b[38;5;246m93 ', 143 '\x1b[38;5;246m94 ', '\x1b[38;5;246m95 ', 144 '\x1b[38;5;246m96 ', '\x1b[38;5;246m97 ', 145 '\x1b[38;5;246m98 ', '\x1b[38;5;246m99 ', 146 '\x1b[38;5;246m9a ', '\x1b[38;5;246m9b ', 147 '\x1b[38;5;246m9c ', '\x1b[38;5;246m9d ', 148 '\x1b[38;5;246m9e ', '\x1b[38;5;246m9f ', 149 '\x1b[38;5;246ma0 ', '\x1b[38;5;246ma1 ', 150 '\x1b[38;5;246ma2 ', '\x1b[38;5;246ma3 ', 151 '\x1b[38;5;246ma4 ', '\x1b[38;5;246ma5 ', 152 '\x1b[38;5;246ma6 ', '\x1b[38;5;246ma7 ', 153 '\x1b[38;5;246ma8 ', '\x1b[38;5;246ma9 ', 154 '\x1b[38;5;246maa ', '\x1b[38;5;246mab ', 155 '\x1b[38;5;246mac ', '\x1b[38;5;246mad ', 156 '\x1b[38;5;246mae ', '\x1b[38;5;246maf ', 157 '\x1b[38;5;246mb0 ', '\x1b[38;5;246mb1 ', 158 '\x1b[38;5;246mb2 ', '\x1b[38;5;246mb3 ', 159 '\x1b[38;5;246mb4 ', '\x1b[38;5;246mb5 ', 160 '\x1b[38;5;246mb6 ', '\x1b[38;5;246mb7 ', 161 '\x1b[38;5;246mb8 ', '\x1b[38;5;246mb9 ', 162 '\x1b[38;5;246mba ', '\x1b[38;5;246mbb ', 163 '\x1b[38;5;246mbc ', '\x1b[38;5;246mbd ', 164 '\x1b[38;5;246mbe ', '\x1b[38;5;246mbf ', 165 '\x1b[38;5;246mc0 ', '\x1b[38;5;246mc1 ', 166 '\x1b[38;5;246mc2 ', '\x1b[38;5;246mc3 ', 167 '\x1b[38;5;246mc4 ', '\x1b[38;5;246mc5 ', 168 '\x1b[38;5;246mc6 ', '\x1b[38;5;246mc7 ', 169 '\x1b[38;5;246mc8 ', '\x1b[38;5;246mc9 ', 170 '\x1b[38;5;246mca ', '\x1b[38;5;246mcb ', 171 '\x1b[38;5;246mcc ', '\x1b[38;5;246mcd ', 172 '\x1b[38;5;246mce ', '\x1b[38;5;246mcf ', 173 '\x1b[38;5;246md0 ', '\x1b[38;5;246md1 ', 174 '\x1b[38;5;246md2 ', '\x1b[38;5;246md3 ', 175 '\x1b[38;5;246md4 ', '\x1b[38;5;246md5 ', 176 '\x1b[38;5;246md6 ', '\x1b[38;5;246md7 ', 177 '\x1b[38;5;246md8 ', '\x1b[38;5;246md9 ', 178 '\x1b[38;5;246mda ', '\x1b[38;5;246mdb ', 179 '\x1b[38;5;246mdc ', '\x1b[38;5;246mdd ', 180 '\x1b[38;5;246mde ', '\x1b[38;5;246mdf ', 181 '\x1b[38;5;246me0 ', '\x1b[38;5;246me1 ', 182 '\x1b[38;5;246me2 ', '\x1b[38;5;246me3 ', 183 '\x1b[38;5;246me4 ', '\x1b[38;5;246me5 ', 184 '\x1b[38;5;246me6 ', '\x1b[38;5;246me7 ', 185 '\x1b[38;5;246me8 ', '\x1b[38;5;246me9 ', 186 '\x1b[38;5;246mea ', '\x1b[38;5;246meb ', 187 '\x1b[38;5;246mec ', '\x1b[38;5;246med ', 188 '\x1b[38;5;246mee ', '\x1b[38;5;246mef ', 189 '\x1b[38;5;246mf0 ', '\x1b[38;5;246mf1 ', 190 '\x1b[38;5;246mf2 ', '\x1b[38;5;246mf3 ', 191 '\x1b[38;5;246mf4 ', '\x1b[38;5;246mf5 ', 192 '\x1b[38;5;246mf6 ', '\x1b[38;5;246mf7 ', 193 '\x1b[38;5;246mf8 ', '\x1b[38;5;246mf9 ', 194 '\x1b[38;5;246mfa ', '\x1b[38;5;246mfb ', 195 '\x1b[38;5;246mfc ', '\x1b[38;5;246mfd ', 196 '\x1b[38;5;246mfe ', '\x1b[38;5;209mff ', 197 ) 198 199 # int2ascii slightly speeds up func show_ascii 200 int2ascii = tuple(chr(i) if 32 <= i < 127 else ' ' for i in range(256)) 201 202 # visible noticeably speeds up func show_ascii; notice how spaces (code 32) 203 # aren't considered visible symbols, which makes sense in func show_ascii 204 visible = tuple(32 < i < 127 for i in range(256)) 205 206 207 def show_hex(w, src, chunk_size: int = 16) -> None: 208 '''Handle all input from the source given, emitting styled output''' 209 210 # make the ruler/line-breather, which shows up every 5 hex-output lines 211 pre = 8 * ' ' 212 pat = ' ·' 213 pat = int(3 * chunk_size / len(pat)) * pat 214 sep_line = f'{pre} \x1b[38;5;245m{pat}\x1b[0m\n' 215 216 # n is the current byte offset shown at the start of each display line 217 n = 0 218 219 # lines keeps track of the main output line/row count, to figure out 220 # when to put `breather` lines 221 lines = 0 222 223 # prev remembers the previous chunk, as showing ASCII content for 224 # 2 output-lines worth of bytes requires staying 1 step behind, so 225 # to speak 226 prev = src.read(chunk_size) 227 if len(prev) == 0: 228 return 229 230 while True: 231 chunk = src.read(chunk_size) 232 if len(chunk) == 0: 233 break 234 235 if lines % 5 == 0 and lines > 0: 236 w.write(sep_line) 237 show_line(w, n, prev, chunk, chunk_size) 238 239 n += len(prev) 240 prev = chunk 241 lines += 1 242 243 # don't forget the last output line 244 if len(prev) > 0: 245 if lines % 5 == 0 and lines > 0: 246 w.write(sep_line) 247 show_line(w, n, prev, bytes(), chunk_size) 248 249 250 def show_line(w, n: int, prev, chunk, chunk_size: int) -> None: 251 '''Help func show_hex do its job, simplifying its control flow''' 252 253 # looking up global vars is slower in older versions of python 254 table = byte2styled_hex 255 256 # w.write(f'{n:8} \x1b[48;5;254m') 257 show_restyled_uint(w, n, 8) 258 w.write(' \x1b[48;5;254m') 259 for e in prev: 260 w.write(table[e]) 261 w.write('\x1b[0m') 262 show_ascii(w, prev, chunk, 3 * (chunk_size - len(prev)) + 2) 263 w.write('\n') 264 265 266 def show_restyled_uint(w, n: int, width: int) -> None: 267 '''Alternate styles on 3-item chunks of digits from the integer given''' 268 269 digits = str(n) 270 l = len(digits) 271 272 # left-pad digits with spaces to fill the output-width given 273 write_spaces(w, width - l) 274 275 # it's quicker to just emit short-enough digit-runs verbatim 276 if l < 4: 277 w.write(digits) 278 return 279 280 # emit leading chunk of digits, which is the only one which 281 # can have fewer than 3 items 282 lead = l % 3 283 w.write(digits[:lead]) 284 285 # the rest of the string now has a multiple of 3 items left 286 start = lead 287 288 # start by styling the next digit-group only if there was a 289 # non-empty leading group at the start of the full digit-run 290 use_style = lead > 0 291 292 # alternate styles until the string is over 293 while start < l: 294 # the digits left are always a multiple of 3 295 stop = start + 3 296 297 if use_style: 298 w.write('\x1b[38;5;249m') 299 w.write(digits[start:stop]) 300 w.write('\x1b[0m') 301 else: 302 w.write(digits[start:stop]) 303 304 # switch style and advance to the next 3-digit chunk 305 use_style = not use_style 306 start = stop 307 308 309 def show_ascii(w, first, second: bytes, pre: int) -> None: 310 '''Emit the ASCII side-panel for func show_hex''' 311 312 # looking up global vars is slower in older versions of python 313 vis = visible 314 table = int2ascii 315 write_spaces_l = write_spaces 316 317 # prev_vis keeps track of the previous byte's `visibility`, so spaces 318 # are added when bytes change from non-visible-ASCII to visible-ASCII 319 prev_vis = False 320 321 is_vis = False 322 spaces = pre 323 324 # show ASCII symbols from the first `line` in the pair 325 for e in first: 326 is_vis = vis[e] 327 if is_vis: 328 if not prev_vis: 329 write_spaces_l(w, spaces) 330 spaces = 1 331 w.write(table[e]) 332 prev_vis = is_vis 333 334 # do the same for the second `line` in the pair 335 for e in second: 336 is_vis = vis[e] 337 if is_vis: 338 if not prev_vis: 339 write_spaces_l(w, spaces) 340 spaces = 1 341 w.write(table[e]) 342 prev_vis = is_vis 343 344 345 def write_spaces(w, n: int) -> None: 346 '''Emit the number of spaces given, minimizing `write` calls.''' 347 348 if n < 1: 349 return 350 351 # looking up global vars is slower in older versions of python 352 buf = spaces 353 354 if n < len(buf): 355 w.write(buf[n]) 356 return 357 358 while n >= len(buf): 359 w.write(buf[-1]) 360 n -= len(buf) 361 w.write(buf[n]) 362 363 364 def seems_url(s: str) -> bool: 365 for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'): 366 if s.startswith(prot): 367 return True 368 return False 369 370 371 # args is the `proper` list of arguments given to the script 372 args = argv[1:] 373 374 # a leading help-option arg means show the help message and quit 375 if len(args) > 0 and args[0].lower() in ('-h', '--h', '-help', '--help'): 376 print(info, file=stderr) 377 exit(0) 378 379 # narrow-output is to fit results in 80-column mode 380 bytes_per_line = 16 381 if len(args) > 0 and args[0].lower() in ('-n', '--n', '-narrow', '--narrow'): 382 bytes_per_line = 12 383 args = args[1:] 384 elif len(args) > 0: 385 # allow a leading integer argument to set exactly how many bytes per 386 # line to show in the styled output, before the ASCII-panel contents 387 try: 388 # try to parse an integer number, after turning double-dashes 389 # into single ones, which may lead to parsed negative integers 390 n = int(args[0].lstrip('-')) 391 # negative integers are a result of option-style leading dashes 392 n = int(abs(n)) 393 394 if n > 0: 395 # only change the width-setting if leading number isn't zero 396 bytes_per_line = n 397 # don't treat a leading integer as a filepath, no matter what 398 args = args[1:] 399 except: 400 # avoid exceptions if leading arg isn't a valid integer 401 pass 402 403 # spaces lets func write_spaces minimize `write` operations, resulting in 404 # noticeable speed-ups when the script deals with megabytes of data 405 spaces = tuple(i * ' ' for i in range(3 * bytes_per_line + 4)) 406 407 try: 408 stdout.reconfigure(newline='\n', encoding='utf-8') 409 410 if args.count('-') > 1: 411 msg = 'reading from `-` (standard input) more than once not allowed' 412 raise ValueError(msg) 413 414 # handle all/any named files given 415 for i, path in enumerate(args): 416 # put some empty lines between separate files 417 if i > 0: 418 stdout.write('\n') 419 stdout.write('\n') 420 421 if path == '-': 422 stdout.write('• - (<stdin>)\n') 423 stdout.write('\n') 424 show_hex(stdout, stdin.buffer, bytes_per_line) 425 continue 426 427 if seems_url(path): 428 with urlopen(path) as inp: 429 stdout.write(f'• {path}\n') 430 stdout.write('\n') 431 show_hex(stdout, inp, bytes_per_line) 432 continue 433 434 with open(path, mode='rb', buffering=4_960) as inp: 435 n = fstat(inp.fileno()).st_size 436 stdout.write(f'• {path} \x1b[38;5;245m({n:,} bytes)\x1b[0m\n') 437 stdout.write('\n') 438 show_hex(stdout, inp, bytes_per_line) 439 440 # read from stdin, if no input files were given 441 if len(args) == 0: 442 stdout.write('• <stdin>\n') 443 stdout.write('\n') 444 show_hex(stdout, stdin.buffer, bytes_per_line) 445 except BrokenPipeError: 446 # quit quietly, instead of showing a confusing error message 447 stderr.flush() 448 stderr.close() 449 except KeyboardInterrupt: 450 # quit quietly, instead of showing a confusing error message 451 stderr.flush() 452 stderr.close() 453 exit(2) 454 except Exception as e: 455 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 456 exit(1)