File: nh.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./nh ./nh.c 29 30 Building with COMPACT_OUTPUT defined makes `nh` output many fewer bytes, at 31 the cost of using arguably worse colors. You can do that by running 32 33 cc -Wall -s -O2 -D COMPACT_OUTPUT -o ./nh ./nh.c 34 */ 35 36 #include <stdbool.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <sys/stat.h> 41 42 #ifdef _WIN32 43 #include <fcntl.h> 44 #include <windows.h> 45 #endif 46 47 // #define COMPACT_OUTPUT 48 49 // info is the multi-line help message 50 const char* info = "" 51 "nh [options...] [filenames...]\n" 52 "\n" 53 "Nice Hexadecimal is a simple hexadecimal (base-16) viewer to inspect bytes\n" 54 "from files or standard input.\n" 55 "\n" 56 "Each line shows the starting offset for the bytes shown, 16 of the bytes\n" 57 "themselves in base-16 notation, and any ASCII codes when the byte values\n" 58 "are in the typical ASCII range.\n" 59 "\n" 60 "The base-16 codes are color-coded, with most bytes shown in gray, while\n" 61 "all-1 and all-0 bytes are shown in orange and blue respectively.\n" 62 "\n" 63 "All-0 bytes are the commonest kind in most binary file types and, along\n" 64 "with all-1 bytes are also a special case worth noticing when exploring\n" 65 "binary data, so it makes sense for them to stand out right away.\n" 66 "\n" 67 "\n" 68 "Options\n" 69 "\n" 70 " -h, --h show this help message\n" 71 " -help, --help aliases for option -h\n" 72 "\n" 73 " -p, --p plain-text output, without ANSI styles\n" 74 " -plain, --plain aliases for option -p\n" 75 "\n" 76 " -ho, --ho show hex (base-16) offsets, instead of base-10 ones\n" 77 ""; 78 79 #ifdef COMPACT_OUTPUT 80 #define OUTPUT_FOR_00 "\x1b[34m00 " 81 #define OUTPUT_FOR_FF "\x1b[33mff " 82 #define NORMAL_HEX_STYLE "\x1b[37m" 83 #define ASCII_HEX_STYLE "\x1b[32m" 84 #define ASCII_BYTE_STYLE "\x1b[30m" 85 #define ASCII_WS_STYLE "\x1b[36m" 86 #else 87 // #define OUTPUT_FOR_00 "\x1b[38;5;111m00 " 88 // #define OUTPUT_FOR_FF "\x1b[38;5;209mff " 89 // #define NORMAL_HEX_STYLE "\x1b[38;5;246m" 90 // #define ASCII_HEX_STYLE "\x1b[38;5;72m" 91 // #define ASCII_BYTE_STYLE "\x1b[38;5;239m" 92 #define OUTPUT_FOR_00 "\x1b[38;2;135;175;255m00 " 93 #define OUTPUT_FOR_FF "\x1b[38;2;255;135;95mff " 94 #define NORMAL_HEX_STYLE "\x1b[38;2;148;148;148m" 95 #define ASCII_HEX_STYLE "\x1b[38;2;102;175;135m" 96 #define ASCII_BYTE_STYLE "\x1b[38;2;78;78;78m" 97 #define ASCII_WS_STYLE "\x1b[38;2;6;152;154m" 98 #endif 99 100 // EMIT_CONST abstracts emitting string constants without their final null byte 101 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w) 102 103 void write_bytes(FILE* w, const unsigned char* src, size_t len) { 104 fwrite(src, len, 1, w); 105 } 106 107 // write_hex is faster than calling fprintf(w, "%02x", b): this matters 108 // because it's called for every input byte 109 void write_hex(FILE* w, unsigned char b) { 110 const char* hex_digits = "0123456789abcdef"; 111 putc(hex_digits[b >> 4], w); 112 putc(hex_digits[b & 0x0f], w); 113 } 114 115 // write_styled_hex emits an ANSI color-coded hexadecimal representation 116 // of the byte given 117 void write_styled_hex(FILE* w, unsigned char b) { 118 // all-bits-off is almost always noteworthy 119 if (b == 0) { 120 EMIT_CONST(w, OUTPUT_FOR_00); 121 return; 122 } 123 // all-bits-on is often noteworthy 124 if (b == 0xff) { 125 EMIT_CONST(w, OUTPUT_FOR_FF); 126 return; 127 } 128 129 // regular ASCII display symbols 130 if (33 <= b && b <= 126) { 131 EMIT_CONST(w, ASCII_HEX_STYLE); 132 write_hex(w, b); 133 EMIT_CONST(w, ASCII_BYTE_STYLE); 134 putc(b, w); 135 return; 136 } 137 138 // ASCII whitespace 139 if (b == ' ' || b == '\n' || b == '\t' || b == '\r') { 140 EMIT_CONST(w, ASCII_WS_STYLE); 141 write_hex(w, b); 142 EMIT_CONST(w, ASCII_BYTE_STYLE); 143 putc(' ', w); 144 return; 145 } 146 147 // ASCII control values, and other bytes beyond displayable ASCII 148 EMIT_CONST(w, NORMAL_HEX_STYLE); 149 write_hex(w, b); 150 putc(' ', w); 151 } 152 153 // ruler emits a ruler-like string of spaced-out symbols 154 void ruler(FILE* w, size_t bytes_per_line) { 155 const size_t gap = 4; 156 if (bytes_per_line < gap) { 157 return; 158 } 159 160 EMIT_CONST(w, " ·"); 161 for (size_t n = bytes_per_line - gap; n >= gap; n -= gap) { 162 EMIT_CONST(w, " ·"); 163 } 164 } 165 166 // write_commas_uint shows a number by separating 3-digits groups with commas 167 void write_commas_uint(FILE* w, size_t n) { 168 if (n == 0) { 169 EMIT_CONST(w, "0"); 170 return; 171 } 172 173 size_t digits; 174 // 20 is the most digits unsigned 64-bit ints can ever need 175 unsigned char buf[24]; 176 for (digits = 0; n > 0; digits++, n /= 10) { 177 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 178 } 179 180 // now emit the leading digits, which may not come in 3 181 size_t leading = digits % 3; 182 if (leading == 0) { 183 // avoid having a comma before the first digit 184 leading = digits < 3 ? digits : 3; 185 } 186 unsigned char* start = buf + sizeof(buf) - digits; 187 write_bytes(w, start, leading); 188 start += leading; 189 digits -= leading; 190 191 // now emit all remaining digits in groups of 3, alternating styles 192 for (; digits > 0; start += 3, digits -= 3) { 193 putc(',', w); 194 write_bytes(w, start, 3); 195 } 196 } 197 198 // output_state ties all values representing the current state shared across 199 // all functions involved in interpreting the input-buffer and showing its 200 // bytes and ASCII values 201 typedef struct output_state { 202 // the whole input-buffer and its currently-used length in bytes 203 unsigned char* buf; 204 size_t buflen; 205 206 // the ASCII-text buffer and its currently-used length in bytes 207 unsigned char* txt; 208 size_t txtlen; 209 210 // offset is the byte counter, shown at the start of each line 211 size_t offset; 212 213 // linewidth is how many bytes each line can show at most 214 size_t linewidth; 215 216 // lines is the line counter, which is used to provide periodic 217 // breather lines, to make eye-scanning big output blobs easier 218 size_t lines; 219 220 // emit_offset is chosen to emit the offset at the start of each line 221 void (*emit_offset)(FILE* w, size_t offset); 222 223 // showtxt is a hint on whether it's sensible to show the ASCII-text 224 // buffer for the current line 225 bool showtxt; 226 } output_state; 227 228 // peek_ascii looks 2 lines ahead in the buffer to get all ASCII-like runs 229 // of bytes, which are later meant to show on the side panel 230 void peek_ascii(size_t i, size_t end, output_state* os) { 231 unsigned char prev = 0; 232 os->txtlen = 0; 233 234 for (size_t j = i; j < end; j++) { 235 const unsigned char b = os->buf[j]; 236 237 if (' ' < b && b <= '~') { 238 bool first = os->txtlen == 0; 239 if (first) { 240 // show ASCII panel, if the symbols start on the current line 241 os->showtxt = j - i < os->linewidth; 242 } 243 244 // add a space before the symbol, when it's the start of a `word` 245 if ((prev <= ' ' || prev > '~') && !first) { 246 os->txt[os->txtlen] = ' '; 247 os->txtlen++; 248 } 249 250 // add the symbol itself 251 os->txt[os->txtlen] = b; 252 os->txtlen++; 253 } 254 255 prev = b; 256 } 257 } 258 259 // write_plain_uint is the unstyled counterpart of func write_styled_uint 260 void write_plain_uint(FILE* w, size_t n) { 261 if (n < 1) { 262 EMIT_CONST(w, " 0"); 263 return; 264 } 265 266 size_t digits; 267 // 20 is the most digits unsigned 64-bit ints can ever need 268 unsigned char buf[24]; 269 for (digits = 0; n > 0; digits++, n /= 10) { 270 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 271 } 272 273 // left-pad the coming digits up to 8 chars 274 if (digits < 8) { 275 write_bytes(w, (unsigned char*)" ", 8 - digits); 276 } 277 278 // emit all digits 279 const unsigned char* start = buf + sizeof(buf) - digits; 280 write_bytes(w, start, digits); 281 } 282 283 void write_hex_uint(FILE* w, size_t n) { 284 if (n < 1) { 285 EMIT_CONST(w, "00000000"); 286 return; 287 } 288 289 size_t digits; 290 // 20 is the most digits unsigned 64-bit ints can ever need 291 unsigned char buf[24]; 292 for (digits = 0; n > 0; digits += 2, n /= 256) { 293 unsigned char b = n % 256; 294 const char* hex_digits = "0123456789abcdef"; 295 buf[sizeof(buf) - 1 - digits - 1] = hex_digits[b >> 4]; 296 buf[sizeof(buf) - 1 - digits - 0] = hex_digits[b & 0x0f]; 297 } 298 299 // left-pad the coming digits up to 8 chars 300 if (digits < 8) { 301 write_bytes(w, (unsigned char*)"00000000", 8 - digits); 302 } 303 304 // emit all digits 305 const unsigned char* start = buf + sizeof(buf) - digits; 306 write_bytes(w, start, digits); 307 } 308 309 // write_styled_uint is a quick way to emit the offset-counter showing at the 310 // start of each line; it assumes 8-item left-padding of values, unless the 311 // numbers are too big for that 312 void write_styled_uint(FILE* w, size_t n) { 313 if (n < 1) { 314 EMIT_CONST(w, " 0"); 315 return; 316 } 317 318 size_t digits; 319 // 20 is the most digits unsigned 64-bit ints can ever need 320 unsigned char buf[24]; 321 for (digits = 0; n > 0; digits++, n /= 10) { 322 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 323 } 324 325 // left-pad the coming digits up to 8 chars 326 if (digits < 8) { 327 write_bytes(w, (unsigned char*)" ", 8 - digits); 328 } 329 330 // now emit the leading digits, which may be fewer than 3 331 size_t leading = digits % 3; 332 unsigned char* start = buf + sizeof(buf) - digits; 333 write_bytes(w, start, leading); 334 start += leading; 335 digits -= leading; 336 337 // now emit all remaining digits in groups of 3, alternating styles 338 bool styled = leading != 0; 339 for (; digits > 0; start += 3, digits -= 3, styled = !styled) { 340 if (styled) { 341 #ifdef COMPACT_OUTPUT 342 EMIT_CONST(w, "\x1b[38;5;248m"); 343 #else 344 EMIT_CONST(w, "\x1b[38;2;168;168;168m"); 345 #endif 346 write_bytes(w, start, 3); 347 EMIT_CONST(w, "\x1b[0m"); 348 } else { 349 write_bytes(w, start, 3); 350 } 351 } 352 } 353 354 // emit_styled_file_info emits an ANSI-styled line showing a filename and the 355 // file's size in bytes 356 void emit_styled_file_info(FILE* w, const char* path, size_t nbytes) { 357 EMIT_CONST(w, "• "); 358 write_bytes(w, (unsigned char*)path, strlen(path)); 359 #ifdef COMPACT_OUTPUT 360 EMIT_CONST(w, " \x1b[38;5;245m("); 361 #else 362 EMIT_CONST(w, " \x1b[38;2;138;138;138m("); 363 #endif 364 write_commas_uint(w, nbytes); 365 EMIT_CONST(w, " bytes)\x1b[0m\n"); 366 } 367 368 // emit_plain_file_info is the unstyled counterpart of func emit_styled_file_info 369 void emit_plain_file_info(FILE* w, const char* path, size_t nbytes) { 370 EMIT_CONST(w, "• "); 371 write_bytes(w, (unsigned char*)path, strlen(path)); 372 EMIT_CONST(w, " ("); 373 write_commas_uint(w, nbytes); 374 EMIT_CONST(w, " bytes)\n"); 375 } 376 377 // emit_styled_line handles the details of showing a styled line out of the current 378 // input-buffer chunk 379 void emit_styled_line(FILE* w, size_t i, size_t end, output_state* os) { 380 for (size_t j = i; j < end; j++, os->offset++) { 381 const unsigned char b = os->buf[j]; 382 383 if (j % os->linewidth == 0) { 384 // show a ruler every few lines to make eye-scanning easier 385 if (os->lines % 5 == 0 && os->lines > 0) { 386 #ifdef COMPACT_OUTPUT 387 EMIT_CONST(w, " \x1b[38;5;245m"); 388 #else 389 EMIT_CONST(w, " \x1b[38;2;138;138;138m"); 390 #endif 391 ruler(w, os->linewidth); 392 EMIT_CONST(w, "\x1b[0m\n"); 393 } 394 os->lines++; 395 396 // start next line with offset of its 1st item, also 397 // changing the background color for the colored hex 398 // code which will follow 399 // fprintf(stdout, "%8d", os->offset); 400 // write_styled_uint(w, os->offset); 401 os->emit_offset(w, os->offset); 402 #ifdef COMPACT_OUTPUT 403 EMIT_CONST(w, " \x1b[48;5;254m"); 404 #else 405 EMIT_CONST(w, " \x1b[48;2;228;228;228m"); 406 #endif 407 } 408 409 // show the current byte `with style` 410 write_styled_hex(w, b); 411 } 412 413 if (os->showtxt) { 414 EMIT_CONST(w, "\x1b[0m "); 415 for (size_t j = end - i; j < os->linewidth; j++) { 416 EMIT_CONST(w, " "); 417 } 418 419 write_bytes(w, os->txt, os->txtlen); 420 putc('\n', w); 421 return; 422 } 423 EMIT_CONST(w, "\x1b[0m\n"); 424 } 425 426 // emit_plain_line handles the details of showing a plain (unstyled) line out 427 // of the current input-buffer chunk 428 void emit_plain_line(FILE* w, size_t i, size_t end, output_state* os) { 429 for (size_t j = i; j < end; j++, os->offset++) { 430 const unsigned char b = os->buf[j]; 431 432 if (j % os->linewidth == 0) { 433 // show a ruler every few lines to make eye-scanning easier 434 if (os->lines % 5 == 0 && os->lines > 0) { 435 // EMIT_CONST(w, " "); 436 // ruler(w, os->linewidth); 437 putc('\n', w); 438 } 439 os->lines++; 440 441 // start next line with offset of its 1st item, also 442 // changing the background color for the colored hex 443 // code which will follow 444 // fprintf(stdout, "%8d", os->offset); 445 write_plain_uint(w, os->offset); 446 EMIT_CONST(w, " "); 447 } 448 449 // show the current byte `with style` 450 write_hex(w, b); 451 putc(' ', w); 452 } 453 454 if (os->showtxt) { 455 EMIT_CONST(w, " "); 456 for (size_t j = end - i; j < os->linewidth; j++) { 457 EMIT_CONST(w, " "); 458 } 459 460 write_bytes(w, os->txt, os->txtlen); 461 putc('\n', w); 462 return; 463 } 464 putc('\n', w); 465 } 466 467 // config has all the settings used to emit output 468 typedef struct config { 469 // bytes_per_line determines the `width` of output lines 470 size_t bytes_per_line; 471 472 // emit_file_info is chosen to emit file-info with colors or plainly 473 void (*emit_file_info)(FILE* w, const char* path, size_t nbytes); 474 475 // emit_line is chosen to emit hex bytes with colors or plainly 476 void (*emit_line)(FILE* w, size_t i, size_t end, output_state* os); 477 478 // emit_offset is chosen to emit the offset at the start of each line 479 void (*emit_offset)(FILE* w, size_t offset); 480 } config; 481 482 // handle_reader shows all bytes read from the source given as colored hex 483 // values, showing offsets and ASCII symbols on the sides of each output line 484 void handle_reader(FILE* w, FILE* src, config cfg) { 485 const size_t bufcap = 32 * 1024; 486 // limit line-width to the buffer's capacity 487 if (cfg.bytes_per_line > bufcap) { 488 cfg.bytes_per_line = bufcap; 489 } 490 491 const size_t two_lines = 2 * cfg.bytes_per_line; 492 unsigned char txt[two_lines]; 493 494 unsigned char buf[bufcap]; 495 // ensure the effective buffer-size is a multiple of the line-width 496 size_t max = bufcap - bufcap % cfg.bytes_per_line; 497 498 output_state os; 499 os.buf = buf; 500 os.linewidth = cfg.bytes_per_line; 501 os.lines = 0; 502 os.offset = 0; 503 os.txt = txt; 504 os.emit_offset = cfg.emit_offset; 505 506 const size_t one_line = cfg.bytes_per_line; 507 508 while (!feof(w)) { 509 os.buflen = fread(&buf, sizeof(buf[0]), max, src); 510 if (os.buflen < 1) { 511 // assume input is over when no bytes were read 512 break; 513 } 514 515 for (size_t i = 0; i < os.buflen; i += one_line) { 516 size_t end; 517 518 // remember all ASCII symbols in current pair of output lines 519 end = i + two_lines < os.buflen ? i + two_lines : os.buflen; 520 peek_ascii(i, end, &os); 521 522 // show current output line 523 end = i + one_line < os.buflen ? i + one_line : os.buflen; 524 cfg.emit_line(w, i, end, &os); 525 } 526 } 527 528 fflush(w); 529 } 530 531 // handle_file handles data from the filename given; returns false only when 532 // the file can't be opened 533 bool handle_file(FILE* w, const char* path, config cfg) { 534 FILE* f = fopen(path, "rb"); 535 if (f == NULL) { 536 // ensure currently-buffered/deferred output shows up right now: not 537 // doing so may scramble results in the common case where stdout and 538 // stderr are the same, thus confusing users 539 putc('\n', w); 540 541 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 542 return false; 543 } 544 545 // get the file size 546 struct stat st; 547 fstat(fileno(f), &st); 548 549 // show output 550 cfg.emit_file_info(w, path, st.st_size); 551 EMIT_CONST(w, "\n"); 552 handle_reader(w, f, cfg); 553 554 fclose(f); 555 return true; 556 } 557 558 // is_help_option simplifies control-flow for func run 559 bool is_help_option(const char* s) { 560 return (s[0] == '-') && ( 561 strcmp(s, "-h") == 0 || 562 strcmp(s, "-help") == 0 || 563 strcmp(s, "--h") == 0 || 564 strcmp(s, "--help") == 0 565 ); 566 } 567 568 // is_plain_option simplifies control-flow for func run 569 bool is_plain_option(const char* s) { 570 return (s[0] == '-') && ( 571 strcmp(s, "-p") == 0 || 572 strcmp(s, "-plain") == 0 || 573 strcmp(s, "--p") == 0 || 574 strcmp(s, "--plain") == 0 575 ); 576 } 577 578 // is_hex_offsets simplifies control-flow for func run 579 bool is_hex_offsets_option(const char* s) { 580 return (s[0] == '-') && ( 581 strcmp(s, "-ho") == 0 || 582 strcmp(s, "--ho") == 0 || 583 strcmp(s, "-hexoffsets") == 0 || 584 strcmp(s, "--hexoffsets") == 0 || 585 strcmp(s, "-hex-offsets") == 0 || 586 strcmp(s, "--hex-offsets") == 0 587 ); 588 } 589 590 // run returns the number of errors 591 int run(int argc, char** argv, FILE* w) { 592 config cfg; 593 cfg.bytes_per_line = 16; 594 cfg.emit_line = &emit_styled_line; 595 cfg.emit_file_info = &emit_styled_file_info; 596 cfg.emit_offset = &write_styled_uint; 597 598 size_t files = 0; 599 size_t errors = 0; 600 601 // handle all filenames/options given 602 for (size_t i = 1; i < argc && !feof(w); i++) { 603 // a `-` filename stands for the standard input 604 if (argv[i][0] == '-' && argv[i][1] == 0) { 605 EMIT_CONST(w, "• <stdin>\n"); 606 EMIT_CONST(w, "\n"); 607 handle_reader(w, stdin, cfg); 608 continue; 609 } 610 611 if (is_help_option(argv[i])) { 612 // help option quits the app right away 613 fprintf(stderr, "%s", info); 614 return 0; 615 } 616 617 if (is_plain_option(argv[i])) { 618 cfg.emit_line = &emit_plain_line; 619 cfg.emit_file_info = &emit_plain_file_info; 620 continue; 621 } 622 623 if (is_hex_offsets_option(argv[i])) { 624 cfg.emit_offset = &write_hex_uint; 625 continue; 626 } 627 628 if (files > 0) { 629 // put an empty line between adjacent hex outputs 630 putc('\n', w); 631 } 632 633 if (!handle_file(w, argv[i], cfg)) { 634 errors++; 635 } 636 files++; 637 } 638 639 // no filenames means use stdin as the only input 640 if (files == 0) { 641 EMIT_CONST(w, "• <stdin>\n"); 642 EMIT_CONST(w, "\n"); 643 handle_reader(w, stdin, cfg); 644 } 645 646 return errors; 647 } 648 649 int main(int argc, char** argv) { 650 #ifdef _WIN32 651 setmode(fileno(stdin), O_BINARY); 652 // ensure output lines end in LF instead of CRLF on windows 653 setmode(fileno(stdout), O_BINARY); 654 setmode(fileno(stderr), O_BINARY); 655 #endif 656 657 // enable full buffering for stdout 658 setvbuf(stdout, NULL, _IOFBF, 0); 659 660 return run(argc, argv, stdout) == 0 ? 0 : 1; 661 }