File: nh.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./nh ./nh.c 29 30 Building with COMPACT_OUTPUT defined makes `nh` output many fewer bytes, at 31 the cost of using arguably worse colors. You can do that by running 32 33 cc -Wall -s -O2 -D COMPACT_OUTPUT -o ./nh ./nh.c 34 */ 35 36 #include <fcntl.h> 37 #include <math.h> 38 #include <stdbool.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <sys/stat.h> 43 44 #ifdef _WIN32 45 #include <windows.h> 46 #endif 47 48 // #define COMPACT_OUTPUT 49 50 // info is the multi-line help message 51 const char* info = "" 52 "nh [options...] [filenames...]\n" 53 "\n" 54 "Nice Hexadecimal is a simple hexadecimal (base-16) viewer to inspect bytes\n" 55 "from files or standard input.\n" 56 "\n" 57 "Each line shows the starting offset for the bytes shown, 16 of the bytes\n" 58 "themselves in base-16 notation, and any ASCII codes when the byte values\n" 59 "are in the typical ASCII range.\n" 60 "\n" 61 "The base-16 codes are color-coded, with most bytes shown in gray, while\n" 62 "all-1 and all-0 bytes are shown in orange and blue respectively.\n" 63 "\n" 64 "All-0 bytes are the commonest kind in most binary file types and, along\n" 65 "with all-1 bytes are also a special case worth noticing when exploring\n" 66 "binary data, so it makes sense for them to stand out right away.\n" 67 "\n" 68 "\n" 69 "Options\n" 70 "\n" 71 " -h, --h show this help message\n" 72 " -help, --help aliases for option -h\n" 73 "\n" 74 " -p, --p plain-text output, without ANSI styles\n" 75 " -plain, --plain aliases for option -p\n" 76 "\n" 77 " -ho, --ho show hex (base-16) offsets, instead of base-10 ones\n" 78 ""; 79 80 #ifdef COMPACT_OUTPUT 81 #define OUTPUT_FOR_00 "\x1b[34m00 " 82 #define OUTPUT_FOR_FF "\x1b[33mff " 83 #define NORMAL_HEX_STYLE "\x1b[37m" 84 #define ASCII_HEX_STYLE "\x1b[32m" 85 #define ASCII_BYTE_STYLE "\x1b[30m" 86 #else 87 #define OUTPUT_FOR_00 "\x1b[38;5;111m00 " 88 #define OUTPUT_FOR_FF "\x1b[38;5;209mff " 89 #define NORMAL_HEX_STYLE "\x1b[38;5;246m" 90 #define ASCII_HEX_STYLE "\x1b[38;5;72m" 91 #define ASCII_BYTE_STYLE "\x1b[38;5;239m" 92 #endif 93 94 // EMIT_CONST abstracts emitting string constants without their final null byte 95 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w) 96 97 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) { 98 fwrite(src, len, 1, w); 99 } 100 101 // write_hex is faster than calling fprintf(w, "%02x", b): this matters 102 // because it's called for every input byte 103 void write_hex(FILE* w, unsigned char b) { 104 const char* hex_digits = "0123456789abcdef"; 105 putc(hex_digits[b >> 4], w); 106 putc(hex_digits[b & 0x0f], w); 107 } 108 109 // write_styled_hex emits an ANSI color-coded hexadecimal representation 110 // of the byte given 111 void write_styled_hex(FILE* w, unsigned char b) { 112 // all-bits-off is almost always noteworthy 113 if (b == 0) { 114 EMIT_CONST(w, OUTPUT_FOR_00); 115 return; 116 } 117 // all-bits-on is often noteworthy 118 if (b == 0xff) { 119 EMIT_CONST(w, OUTPUT_FOR_FF); 120 return; 121 } 122 123 // regular ASCII display symbols 124 if (32 <= b && b <= 126) { 125 EMIT_CONST(w, ASCII_HEX_STYLE); 126 write_hex(w, b); 127 EMIT_CONST(w, ASCII_BYTE_STYLE); 128 putc(b, w); 129 return; 130 } 131 132 // ASCII control values, and other bytes beyond displayable ASCII 133 EMIT_CONST(w, NORMAL_HEX_STYLE); 134 write_hex(w, b); 135 putc(' ', w); 136 } 137 138 // ruler emits a ruler-like string of spaced-out symbols 139 void ruler(FILE* w, size_t bytes_per_line) { 140 const size_t gap = 4; 141 if (bytes_per_line < gap) { 142 return; 143 } 144 145 EMIT_CONST(w, " ·"); 146 for (size_t n = bytes_per_line - gap; n >= gap; n -= gap) { 147 EMIT_CONST(w, " ·"); 148 } 149 } 150 151 // write_commas_uint shows a number by separating 3-digits groups with commas 152 void write_commas_uint(FILE* w, size_t n) { 153 if (n == 0) { 154 EMIT_CONST(w, "0"); 155 return; 156 } 157 158 size_t digits; 159 // 20 is the most digits unsigned 64-bit ints can ever need 160 unsigned char buf[24]; 161 for (digits = 0; n > 0; digits++, n /= 10) { 162 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 163 } 164 165 // now emit the leading digits, which may not come in 3 166 size_t leading = digits % 3; 167 if (leading == 0) { 168 // avoid having a comma before the first digit 169 leading = digits < 3 ? digits : 3; 170 } 171 unsigned char* start = buf + sizeof(buf) - digits; 172 write_bytes(w, start, leading); 173 start += leading; 174 digits -= leading; 175 176 // now emit all remaining digits in groups of 3, alternating styles 177 for (; digits > 0; start += 3, digits -= 3) { 178 putc(',', w); 179 write_bytes(w, start, 3); 180 } 181 } 182 183 // output_state ties all values representing the current state shared across 184 // all functions involved in interpreting the input-buffer and showing its 185 // bytes and ASCII values 186 typedef struct output_state { 187 // the whole input-buffer and its currently-used length in bytes 188 unsigned char* buf; 189 size_t buflen; 190 191 // the ASCII-text buffer and its currently-used length in bytes 192 unsigned char* txt; 193 size_t txtlen; 194 195 // offset is the byte counter, shown at the start of each line 196 size_t offset; 197 198 // linewidth is how many bytes each line can show at most 199 size_t linewidth; 200 201 // lines is the line counter, which is used to provide periodic 202 // breather lines, to make eye-scanning big output blobs easier 203 size_t lines; 204 205 // emit_offset is chosen to emit the offset at the start of each line 206 void (*emit_offset)(FILE* w, size_t offset); 207 208 // showtxt is a hint on whether it's sensible to show the ASCII-text 209 // buffer for the current line 210 bool showtxt; 211 } output_state; 212 213 // peek_ascii looks 2 lines ahead in the buffer to get all ASCII-like runs 214 // of bytes, which are later meant to show on the side panel 215 void peek_ascii(size_t i, size_t end, output_state* os) { 216 unsigned char prev = 0; 217 os->txtlen = 0; 218 219 for (size_t j = i; j < end; j++) { 220 const unsigned char b = os->buf[j]; 221 222 if (' ' < b && b <= '~') { 223 bool first = os->txtlen == 0; 224 if (first) { 225 // show ASCII panel, if the symbols start on the current line 226 os->showtxt = j - i < os->linewidth; 227 } 228 229 // add a space before the symbol, when it's the start of a `word` 230 if ((prev <= ' ' || prev > '~') && !first) { 231 os->txt[os->txtlen] = ' '; 232 os->txtlen++; 233 } 234 235 // add the symbol itself 236 os->txt[os->txtlen] = b; 237 os->txtlen++; 238 } 239 240 prev = b; 241 } 242 } 243 244 // write_plain_uint is the unstyled counterpart of func write_styled_uint 245 void write_plain_uint(FILE* w, size_t n) { 246 if (n < 1) { 247 EMIT_CONST(w, " 0"); 248 return; 249 } 250 251 size_t digits; 252 // 20 is the most digits unsigned 64-bit ints can ever need 253 unsigned char buf[24]; 254 for (digits = 0; n > 0; digits++, n /= 10) { 255 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 256 } 257 258 // left-pad the coming digits up to 8 chars 259 if (digits < 8) { 260 write_bytes(w, (unsigned char*)" ", 8 - digits); 261 } 262 263 // emit all digits 264 unsigned char* start = buf + sizeof(buf) - digits; 265 write_bytes(w, start, digits); 266 } 267 268 void write_hex_uint(FILE* w, size_t n) { 269 if (n < 1) { 270 EMIT_CONST(w, "00000000"); 271 return; 272 } 273 274 size_t digits; 275 // 20 is the most digits unsigned 64-bit ints can ever need 276 unsigned char buf[24]; 277 for (digits = 0; n > 0; digits += 2, n /= 256) { 278 unsigned char b = n % 256; 279 const char* hex_digits = "0123456789abcdef"; 280 buf[sizeof(buf) - 1 - digits - 1] = hex_digits[b >> 4]; 281 buf[sizeof(buf) - 1 - digits - 0] = hex_digits[b & 0x0f]; 282 } 283 284 // left-pad the coming digits up to 8 chars 285 if (digits < 8) { 286 write_bytes(w, (unsigned char*)"00000000", 8 - digits); 287 } 288 289 // emit all digits 290 unsigned char* start = buf + sizeof(buf) - digits; 291 write_bytes(w, start, digits); 292 } 293 294 // write_styled_uint is a quick way to emit the offset-counter showing at the 295 // start of each line; it assumes 8-item left-padding of values, unless the 296 // numbers are too big for that 297 void write_styled_uint(FILE* w, size_t n) { 298 if (n < 1) { 299 EMIT_CONST(w, " 0"); 300 return; 301 } 302 303 size_t digits; 304 // 20 is the most digits unsigned 64-bit ints can ever need 305 unsigned char buf[24]; 306 for (digits = 0; n > 0; digits++, n /= 10) { 307 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 308 } 309 310 // left-pad the coming digits up to 8 chars 311 if (digits < 8) { 312 write_bytes(w, (unsigned char*)" ", 8 - digits); 313 } 314 315 // now emit the leading digits, which may be fewer than 3 316 size_t leading = digits % 3; 317 unsigned char* start = buf + sizeof(buf) - digits; 318 write_bytes(w, start, leading); 319 start += leading; 320 digits -= leading; 321 322 // now emit all remaining digits in groups of 3, alternating styles 323 bool styled = leading != 0; 324 for (; digits > 0; start += 3, digits -= 3, styled = !styled) { 325 if (styled) { 326 EMIT_CONST(w, "\x1b[38;5;248m"); 327 write_bytes(w, start, 3); 328 EMIT_CONST(w, "\x1b[0m"); 329 } else { 330 write_bytes(w, start, 3); 331 } 332 } 333 } 334 335 // emit_styled_file_info emits an ANSI-styled line showing a filename and the 336 // file's size in bytes 337 void emit_styled_file_info(FILE* w, const char* path, size_t nbytes) { 338 EMIT_CONST(w, "• "); 339 write_bytes(w, (unsigned char*)path, strlen(path)); 340 EMIT_CONST(w, " \x1b[38;5;245m("); 341 write_commas_uint(w, nbytes); 342 EMIT_CONST(w, " bytes)\x1b[0m\n"); 343 } 344 345 // emit_plain_file_info is the unstyled counterpart of func emit_styled_file_info 346 void emit_plain_file_info(FILE* w, const char* path, size_t nbytes) { 347 EMIT_CONST(w, "• "); 348 write_bytes(w, (unsigned char*)path, strlen(path)); 349 EMIT_CONST(w, " ("); 350 write_commas_uint(w, nbytes); 351 EMIT_CONST(w, " bytes)\n"); 352 } 353 354 // emit_styled_line handles the details of showing a styled line out of the current 355 // input-buffer chunk 356 void emit_styled_line(FILE* w, size_t i, size_t end, output_state* os) { 357 for (size_t j = i; j < end; j++, os->offset++) { 358 const unsigned char b = os->buf[j]; 359 360 if (j % os->linewidth == 0) { 361 // show a ruler every few lines to make eye-scanning easier 362 if (os->lines % 5 == 0 && os->lines > 0) { 363 EMIT_CONST(w, " \x1b[38;5;245m"); 364 ruler(w, os->linewidth); 365 EMIT_CONST(w, "\x1b[0m\n"); 366 } 367 os->lines++; 368 369 // start next line with offset of its 1st item, also 370 // changing the background color for the colored hex 371 // code which will follow 372 // fprintf(stdout, "%8d", os->offset); 373 // write_styled_uint(w, os->offset); 374 os->emit_offset(w, os->offset); 375 EMIT_CONST(w, " \x1b[48;5;254m"); 376 } 377 378 // show the current byte `with style` 379 write_styled_hex(w, b); 380 } 381 382 if (os->showtxt) { 383 EMIT_CONST(w, "\x1b[0m "); 384 for (size_t j = end - i; j < os->linewidth; j++) { 385 EMIT_CONST(w, " "); 386 } 387 388 write_bytes(w, os->txt, os->txtlen); 389 putc('\n', w); 390 return; 391 } 392 EMIT_CONST(w, "\x1b[0m\n"); 393 } 394 395 // emit_plain_line handles the details of showing a plain (unstyled) line out 396 // of the current input-buffer chunk 397 void emit_plain_line(FILE* w, size_t i, size_t end, output_state* os) { 398 for (size_t j = i; j < end; j++, os->offset++) { 399 const unsigned char b = os->buf[j]; 400 401 if (j % os->linewidth == 0) { 402 // show a ruler every few lines to make eye-scanning easier 403 if (os->lines % 5 == 0 && os->lines > 0) { 404 // EMIT_CONST(w, " "); 405 // ruler(w, os->linewidth); 406 putc('\n', w); 407 } 408 os->lines++; 409 410 // start next line with offset of its 1st item, also 411 // changing the background color for the colored hex 412 // code which will follow 413 // fprintf(stdout, "%8d", os->offset); 414 write_plain_uint(w, os->offset); 415 EMIT_CONST(w, " "); 416 } 417 418 // show the current byte `with style` 419 write_hex(w, b); 420 putc(' ', w); 421 } 422 423 if (os->showtxt) { 424 EMIT_CONST(w, " "); 425 for (size_t j = end - i; j < os->linewidth; j++) { 426 EMIT_CONST(w, " "); 427 } 428 429 write_bytes(w, os->txt, os->txtlen); 430 putc('\n', w); 431 return; 432 } 433 putc('\n', w); 434 } 435 436 // config has all the settings used to emit output 437 typedef struct config { 438 // bytes_per_line determines the `width` of output lines 439 size_t bytes_per_line; 440 441 // emit_file_info is chosen to emit file-info with colors or plainly 442 void (*emit_file_info)(FILE* w, const char* path, size_t nbytes); 443 444 // emit_line is chosen to emit hex bytes with colors or plainly 445 void (*emit_line)(FILE* w, size_t i, size_t end, output_state* os); 446 447 // emit_offset is chosen to emit the offset at the start of each line 448 void (*emit_offset)(FILE* w, size_t offset); 449 } config; 450 451 bool has_line_feeds(const unsigned char* buf, size_t len) { 452 for (size_t i = 0; i < len; i++) { 453 if (buf[i] == 10) { 454 return true; 455 } 456 } 457 return false; 458 } 459 460 // handle_reader shows all bytes read from the source given as colored hex 461 // values, showing offsets and ASCII symbols on the sides of each output line 462 void handle_reader(FILE* w, FILE* src, config cfg) { 463 const size_t bufcap = 32 * 1024; 464 // limit line-width to the buffer's capacity 465 if (cfg.bytes_per_line > bufcap) { 466 cfg.bytes_per_line = bufcap; 467 } 468 469 const size_t two_lines = 2 * cfg.bytes_per_line; 470 unsigned char txt[two_lines]; 471 472 unsigned char buf[bufcap]; 473 // ensure the effective buffer-size is a multiple of the line-width 474 size_t max = bufcap - bufcap % cfg.bytes_per_line; 475 476 output_state os; 477 os.buf = buf; 478 os.linewidth = cfg.bytes_per_line; 479 os.lines = 0; 480 os.offset = 0; 481 os.txt = txt; 482 os.emit_offset = cfg.emit_offset; 483 484 const size_t one_line = cfg.bytes_per_line; 485 486 while (!feof(w)) { 487 os.buflen = fread(&buf, sizeof(buf[0]), max, src); 488 if (os.buflen < 1) { 489 // assume input is over when no bytes were read 490 fflush(w); 491 return; 492 } 493 494 for (size_t i = 0; i < os.buflen; i += one_line) { 495 size_t end; 496 497 // remember all ASCII symbols in current pair of output lines 498 end = i + two_lines < os.buflen ? i + two_lines : os.buflen; 499 peek_ascii(i, end, &os); 500 501 // show current output line 502 end = i + one_line < os.buflen ? i + one_line : os.buflen; 503 cfg.emit_line(w, i, end, &os); 504 } 505 506 if (has_line_feeds(buf, os.buflen)) { 507 fflush(w); 508 } 509 } 510 } 511 512 // handle_file handles data from the filename given; returns false only when 513 // the file can't be opened 514 bool handle_file(FILE* w, const char* path, config cfg) { 515 FILE* f = fopen(path, "rb"); 516 if (f == NULL) { 517 // ensure currently-buffered/deferred output shows up right now: not 518 // doing so may scramble results in the common case where stdout and 519 // stderr are the same, thus confusing users 520 fflush(w); 521 522 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 523 return false; 524 } 525 526 // get the file size 527 struct stat st; 528 fstat(fileno(f), &st); 529 530 // show output 531 cfg.emit_file_info(w, path, st.st_size); 532 EMIT_CONST(w, "\n"); 533 handle_reader(w, f, cfg); 534 535 fclose(f); 536 return true; 537 } 538 539 // is_help_option simplifies control-flow for func run 540 bool is_help_option(char* s) { 541 return (s[0] == '-') && ( 542 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 543 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 544 ); 545 } 546 547 // is_plain_option simplifies control-flow for func run 548 bool is_plain_option(char* s) { 549 return (s[0] == '-') && ( 550 strcmp(s, "-p") == 0 || strcmp(s, "-plain") == 0 || 551 strcmp(s, "--p") == 0 || strcmp(s, "--plain") == 0 552 ); 553 } 554 555 // is_hex_offsets simplifies control-flow for func run 556 bool is_hex_offsets_option(char* s) { 557 return (s[0] == '-') && ( 558 strcmp(s, "-ho") == 0 || strcmp(s, "--ho") == 0 || 559 strcmp(s, "-hexoffsets") == 0 || strcmp(s, "--hexoffsets") == 0 || 560 strcmp(s, "-hex-offsets") == 0 || strcmp(s, "--hex-offsets") == 0 561 ); 562 } 563 564 // run returns the number of errors 565 int run(int argc, char** argv, FILE* w) { 566 config cfg; 567 cfg.bytes_per_line = 16; 568 cfg.emit_line = &emit_styled_line; 569 cfg.emit_file_info = &emit_styled_file_info; 570 cfg.emit_offset = &write_styled_uint; 571 572 size_t files = 0; 573 size_t errors = 0; 574 575 // handle all filenames/options given 576 for (size_t i = 1; i < argc && !feof(w); i++) { 577 // a `-` filename stands for the standard input 578 if (argv[i][0] == '-' && argv[i][1] == 0) { 579 EMIT_CONST(w, "• <stdin>\n"); 580 EMIT_CONST(w, "\n"); 581 handle_reader(w, stdin, cfg); 582 continue; 583 } 584 585 if (is_help_option(argv[i])) { 586 // help option quits the app right away 587 fprintf(stderr, "%s", info); 588 return 0; 589 } 590 591 if (is_plain_option(argv[i])) { 592 cfg.emit_line = &emit_plain_line; 593 cfg.emit_file_info = &emit_plain_file_info; 594 continue; 595 } 596 597 if (is_hex_offsets_option(argv[i])) { 598 cfg.emit_offset = &write_hex_uint; 599 continue; 600 } 601 602 if (files > 0) { 603 // put an empty line between adjacent hex outputs 604 putc('\n', w); 605 } 606 607 if (!handle_file(w, argv[i], cfg)) { 608 errors++; 609 } 610 files++; 611 } 612 613 // no filenames means use stdin as the only input 614 if (files == 0) { 615 EMIT_CONST(w, "• <stdin>\n"); 616 EMIT_CONST(w, "\n"); 617 handle_reader(w, stdin, cfg); 618 } 619 620 return errors; 621 } 622 623 int main(int argc, char** argv) { 624 #ifdef _WIN32 625 setmode(fileno(stdin), O_BINARY); 626 // ensure output lines end in LF instead of CRLF on windows 627 setmode(fileno(stdout), O_BINARY); 628 setmode(fileno(stderr), O_BINARY); 629 #endif 630 631 return run(argc, argv, stdout) == 0 ? 0 : 1; 632 }