File: nh.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./nh ./nh.c 29 30 Building with COMPACT_OUTPUT defined makes `nh` output many fewer bytes, at 31 the cost of using arguably worse colors. You can do that by running 32 33 cc -Wall -s -O2 -D COMPACT_OUTPUT -o ./nh ./nh.c 34 */ 35 36 #include <fcntl.h> 37 #include <math.h> 38 #include <stdbool.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <sys/stat.h> 43 44 #ifdef _WIN32 45 #include <windows.h> 46 #endif 47 48 // #define COMPACT_OUTPUT 49 50 // info is the multi-line help message 51 const char* info = "" 52 "nh [options...] [filenames...]\n" 53 "\n" 54 "Nice Hexadecimal is a simple hexadecimal (base-16) viewer to inspect bytes\n" 55 "from files or standard input.\n" 56 "\n" 57 "Each line shows the starting offset for the bytes shown, 16 of the bytes\n" 58 "themselves in base-16 notation, and any ASCII codes when the byte values\n" 59 "are in the typical ASCII range.\n" 60 "\n" 61 "The base-16 codes are color-coded, with most bytes shown in gray, while\n" 62 "all-1 and all-0 bytes are shown in orange and blue respectively.\n" 63 "\n" 64 "All-0 bytes are the commonest kind in most binary file types and, along\n" 65 "with all-1 bytes are also a special case worth noticing when exploring\n" 66 "binary data, so it makes sense for them to stand out right away.\n" 67 "\n" 68 "\n" 69 "Options\n" 70 "\n" 71 " -h, --h show this help message\n" 72 " -help, --help aliases for option -h\n" 73 "\n" 74 " -p, --p plain-text output, without ANSI styles\n" 75 " -plain, --plain aliases for option -p\n" 76 "\n" 77 " -ho, --ho show hex (base-16) offsets, instead of base-10 ones\n" 78 ""; 79 80 #ifdef COMPACT_OUTPUT 81 #define OUTPUT_FOR_00 "\x1b[34m00 " 82 #define OUTPUT_FOR_FF "\x1b[33mff " 83 #define NORMAL_HEX_STYLE "\x1b[37m" 84 #define ASCII_HEX_STYLE "\x1b[32m" 85 #define ASCII_BYTE_STYLE "\x1b[30m" 86 #define ASCII_WS_STYLE "\x1b[36m" 87 #else 88 // #define OUTPUT_FOR_00 "\x1b[38;5;111m00 " 89 // #define OUTPUT_FOR_FF "\x1b[38;5;209mff " 90 // #define NORMAL_HEX_STYLE "\x1b[38;5;246m" 91 // #define ASCII_HEX_STYLE "\x1b[38;5;72m" 92 // #define ASCII_BYTE_STYLE "\x1b[38;5;239m" 93 #define OUTPUT_FOR_00 "\x1b[38;2;135;175;255m00 " 94 #define OUTPUT_FOR_FF "\x1b[38;2;255;135;95mff " 95 #define NORMAL_HEX_STYLE "\x1b[38;2;148;148;148m" 96 #define ASCII_HEX_STYLE "\x1b[38;2;102;175;135m" 97 #define ASCII_BYTE_STYLE "\x1b[38;2;78;78;78m" 98 #define ASCII_WS_STYLE "\x1b[38;2;6;152;154m" 99 #endif 100 101 // EMIT_CONST abstracts emitting string constants without their final null byte 102 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w) 103 104 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) { 105 fwrite(src, len, 1, w); 106 } 107 108 // write_hex is faster than calling fprintf(w, "%02x", b): this matters 109 // because it's called for every input byte 110 void write_hex(FILE* w, unsigned char b) { 111 const char* hex_digits = "0123456789abcdef"; 112 putc(hex_digits[b >> 4], w); 113 putc(hex_digits[b & 0x0f], w); 114 } 115 116 // write_styled_hex emits an ANSI color-coded hexadecimal representation 117 // of the byte given 118 void write_styled_hex(FILE* w, unsigned char b) { 119 // all-bits-off is almost always noteworthy 120 if (b == 0) { 121 EMIT_CONST(w, OUTPUT_FOR_00); 122 return; 123 } 124 // all-bits-on is often noteworthy 125 if (b == 0xff) { 126 EMIT_CONST(w, OUTPUT_FOR_FF); 127 return; 128 } 129 130 // regular ASCII display symbols 131 if (33 <= b && b <= 126) { 132 EMIT_CONST(w, ASCII_HEX_STYLE); 133 write_hex(w, b); 134 EMIT_CONST(w, ASCII_BYTE_STYLE); 135 putc(b, w); 136 return; 137 } 138 139 // ASCII whitespace 140 if (b == ' ' || b == '\n' || b == '\t' || b == '\r') { 141 EMIT_CONST(w, ASCII_WS_STYLE); 142 write_hex(w, b); 143 EMIT_CONST(w, ASCII_BYTE_STYLE); 144 putc(' ', w); 145 return; 146 } 147 148 // ASCII control values, and other bytes beyond displayable ASCII 149 EMIT_CONST(w, NORMAL_HEX_STYLE); 150 write_hex(w, b); 151 putc(' ', w); 152 } 153 154 // ruler emits a ruler-like string of spaced-out symbols 155 void ruler(FILE* w, size_t bytes_per_line) { 156 const size_t gap = 4; 157 if (bytes_per_line < gap) { 158 return; 159 } 160 161 EMIT_CONST(w, " ·"); 162 for (size_t n = bytes_per_line - gap; n >= gap; n -= gap) { 163 EMIT_CONST(w, " ·"); 164 } 165 } 166 167 // write_commas_uint shows a number by separating 3-digits groups with commas 168 void write_commas_uint(FILE* w, size_t n) { 169 if (n == 0) { 170 EMIT_CONST(w, "0"); 171 return; 172 } 173 174 size_t digits; 175 // 20 is the most digits unsigned 64-bit ints can ever need 176 unsigned char buf[24]; 177 for (digits = 0; n > 0; digits++, n /= 10) { 178 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 179 } 180 181 // now emit the leading digits, which may not come in 3 182 size_t leading = digits % 3; 183 if (leading == 0) { 184 // avoid having a comma before the first digit 185 leading = digits < 3 ? digits : 3; 186 } 187 unsigned char* start = buf + sizeof(buf) - digits; 188 write_bytes(w, start, leading); 189 start += leading; 190 digits -= leading; 191 192 // now emit all remaining digits in groups of 3, alternating styles 193 for (; digits > 0; start += 3, digits -= 3) { 194 putc(',', w); 195 write_bytes(w, start, 3); 196 } 197 } 198 199 // output_state ties all values representing the current state shared across 200 // all functions involved in interpreting the input-buffer and showing its 201 // bytes and ASCII values 202 typedef struct output_state { 203 // the whole input-buffer and its currently-used length in bytes 204 unsigned char* buf; 205 size_t buflen; 206 207 // the ASCII-text buffer and its currently-used length in bytes 208 unsigned char* txt; 209 size_t txtlen; 210 211 // offset is the byte counter, shown at the start of each line 212 size_t offset; 213 214 // linewidth is how many bytes each line can show at most 215 size_t linewidth; 216 217 // lines is the line counter, which is used to provide periodic 218 // breather lines, to make eye-scanning big output blobs easier 219 size_t lines; 220 221 // emit_offset is chosen to emit the offset at the start of each line 222 void (*emit_offset)(FILE* w, size_t offset); 223 224 // showtxt is a hint on whether it's sensible to show the ASCII-text 225 // buffer for the current line 226 bool showtxt; 227 } output_state; 228 229 // peek_ascii looks 2 lines ahead in the buffer to get all ASCII-like runs 230 // of bytes, which are later meant to show on the side panel 231 void peek_ascii(size_t i, size_t end, output_state* os) { 232 unsigned char prev = 0; 233 os->txtlen = 0; 234 235 for (size_t j = i; j < end; j++) { 236 const unsigned char b = os->buf[j]; 237 238 if (' ' < b && b <= '~') { 239 bool first = os->txtlen == 0; 240 if (first) { 241 // show ASCII panel, if the symbols start on the current line 242 os->showtxt = j - i < os->linewidth; 243 } 244 245 // add a space before the symbol, when it's the start of a `word` 246 if ((prev <= ' ' || prev > '~') && !first) { 247 os->txt[os->txtlen] = ' '; 248 os->txtlen++; 249 } 250 251 // add the symbol itself 252 os->txt[os->txtlen] = b; 253 os->txtlen++; 254 } 255 256 prev = b; 257 } 258 } 259 260 // write_plain_uint is the unstyled counterpart of func write_styled_uint 261 void write_plain_uint(FILE* w, size_t n) { 262 if (n < 1) { 263 EMIT_CONST(w, " 0"); 264 return; 265 } 266 267 size_t digits; 268 // 20 is the most digits unsigned 64-bit ints can ever need 269 unsigned char buf[24]; 270 for (digits = 0; n > 0; digits++, n /= 10) { 271 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 272 } 273 274 // left-pad the coming digits up to 8 chars 275 if (digits < 8) { 276 write_bytes(w, (unsigned char*)" ", 8 - digits); 277 } 278 279 // emit all digits 280 unsigned char* start = buf + sizeof(buf) - digits; 281 write_bytes(w, start, digits); 282 } 283 284 void write_hex_uint(FILE* w, size_t n) { 285 if (n < 1) { 286 EMIT_CONST(w, "00000000"); 287 return; 288 } 289 290 size_t digits; 291 // 20 is the most digits unsigned 64-bit ints can ever need 292 unsigned char buf[24]; 293 for (digits = 0; n > 0; digits += 2, n /= 256) { 294 unsigned char b = n % 256; 295 const char* hex_digits = "0123456789abcdef"; 296 buf[sizeof(buf) - 1 - digits - 1] = hex_digits[b >> 4]; 297 buf[sizeof(buf) - 1 - digits - 0] = hex_digits[b & 0x0f]; 298 } 299 300 // left-pad the coming digits up to 8 chars 301 if (digits < 8) { 302 write_bytes(w, (unsigned char*)"00000000", 8 - digits); 303 } 304 305 // emit all digits 306 unsigned char* start = buf + sizeof(buf) - digits; 307 write_bytes(w, start, digits); 308 } 309 310 // write_styled_uint is a quick way to emit the offset-counter showing at the 311 // start of each line; it assumes 8-item left-padding of values, unless the 312 // numbers are too big for that 313 void write_styled_uint(FILE* w, size_t n) { 314 if (n < 1) { 315 EMIT_CONST(w, " 0"); 316 return; 317 } 318 319 size_t digits; 320 // 20 is the most digits unsigned 64-bit ints can ever need 321 unsigned char buf[24]; 322 for (digits = 0; n > 0; digits++, n /= 10) { 323 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 324 } 325 326 // left-pad the coming digits up to 8 chars 327 if (digits < 8) { 328 write_bytes(w, (unsigned char*)" ", 8 - digits); 329 } 330 331 // now emit the leading digits, which may be fewer than 3 332 size_t leading = digits % 3; 333 unsigned char* start = buf + sizeof(buf) - digits; 334 write_bytes(w, start, leading); 335 start += leading; 336 digits -= leading; 337 338 // now emit all remaining digits in groups of 3, alternating styles 339 bool styled = leading != 0; 340 for (; digits > 0; start += 3, digits -= 3, styled = !styled) { 341 if (styled) { 342 #ifdef COMPACT_OUTPUT 343 EMIT_CONST(w, "\x1b[38;5;248m"); 344 #else 345 EMIT_CONST(w, "\x1b[38;2;168;168;168m"); 346 #endif 347 write_bytes(w, start, 3); 348 EMIT_CONST(w, "\x1b[0m"); 349 } else { 350 write_bytes(w, start, 3); 351 } 352 } 353 } 354 355 // emit_styled_file_info emits an ANSI-styled line showing a filename and the 356 // file's size in bytes 357 void emit_styled_file_info(FILE* w, const char* path, size_t nbytes) { 358 EMIT_CONST(w, "• "); 359 write_bytes(w, (unsigned char*)path, strlen(path)); 360 #ifdef COMPACT_OUTPUT 361 EMIT_CONST(w, " \x1b[38;5;245m("); 362 #else 363 EMIT_CONST(w, " \x1b[38;2;138;138;138m("); 364 #endif 365 write_commas_uint(w, nbytes); 366 EMIT_CONST(w, " bytes)\x1b[0m\n"); 367 } 368 369 // emit_plain_file_info is the unstyled counterpart of func emit_styled_file_info 370 void emit_plain_file_info(FILE* w, const char* path, size_t nbytes) { 371 EMIT_CONST(w, "• "); 372 write_bytes(w, (unsigned char*)path, strlen(path)); 373 EMIT_CONST(w, " ("); 374 write_commas_uint(w, nbytes); 375 EMIT_CONST(w, " bytes)\n"); 376 } 377 378 // emit_styled_line handles the details of showing a styled line out of the current 379 // input-buffer chunk 380 void emit_styled_line(FILE* w, size_t i, size_t end, output_state* os) { 381 for (size_t j = i; j < end; j++, os->offset++) { 382 const unsigned char b = os->buf[j]; 383 384 if (j % os->linewidth == 0) { 385 // show a ruler every few lines to make eye-scanning easier 386 if (os->lines % 5 == 0 && os->lines > 0) { 387 #ifdef COMPACT_OUTPUT 388 EMIT_CONST(w, " \x1b[38;5;245m"); 389 #else 390 EMIT_CONST(w, " \x1b[38;2;138;138;138m"); 391 #endif 392 ruler(w, os->linewidth); 393 EMIT_CONST(w, "\x1b[0m\n"); 394 } 395 os->lines++; 396 397 // start next line with offset of its 1st item, also 398 // changing the background color for the colored hex 399 // code which will follow 400 // fprintf(stdout, "%8d", os->offset); 401 // write_styled_uint(w, os->offset); 402 os->emit_offset(w, os->offset); 403 #ifdef COMPACT_OUTPUT 404 EMIT_CONST(w, " \x1b[48;5;254m"); 405 #else 406 EMIT_CONST(w, " \x1b[48;2;228;228;228m"); 407 #endif 408 } 409 410 // show the current byte `with style` 411 write_styled_hex(w, b); 412 } 413 414 if (os->showtxt) { 415 EMIT_CONST(w, "\x1b[0m "); 416 for (size_t j = end - i; j < os->linewidth; j++) { 417 EMIT_CONST(w, " "); 418 } 419 420 write_bytes(w, os->txt, os->txtlen); 421 putc('\n', w); 422 return; 423 } 424 EMIT_CONST(w, "\x1b[0m\n"); 425 } 426 427 // emit_plain_line handles the details of showing a plain (unstyled) line out 428 // of the current input-buffer chunk 429 void emit_plain_line(FILE* w, size_t i, size_t end, output_state* os) { 430 for (size_t j = i; j < end; j++, os->offset++) { 431 const unsigned char b = os->buf[j]; 432 433 if (j % os->linewidth == 0) { 434 // show a ruler every few lines to make eye-scanning easier 435 if (os->lines % 5 == 0 && os->lines > 0) { 436 // EMIT_CONST(w, " "); 437 // ruler(w, os->linewidth); 438 putc('\n', w); 439 } 440 os->lines++; 441 442 // start next line with offset of its 1st item, also 443 // changing the background color for the colored hex 444 // code which will follow 445 // fprintf(stdout, "%8d", os->offset); 446 write_plain_uint(w, os->offset); 447 EMIT_CONST(w, " "); 448 } 449 450 // show the current byte `with style` 451 write_hex(w, b); 452 putc(' ', w); 453 } 454 455 if (os->showtxt) { 456 EMIT_CONST(w, " "); 457 for (size_t j = end - i; j < os->linewidth; j++) { 458 EMIT_CONST(w, " "); 459 } 460 461 write_bytes(w, os->txt, os->txtlen); 462 putc('\n', w); 463 return; 464 } 465 putc('\n', w); 466 } 467 468 // config has all the settings used to emit output 469 typedef struct config { 470 // bytes_per_line determines the `width` of output lines 471 size_t bytes_per_line; 472 473 // emit_file_info is chosen to emit file-info with colors or plainly 474 void (*emit_file_info)(FILE* w, const char* path, size_t nbytes); 475 476 // emit_line is chosen to emit hex bytes with colors or plainly 477 void (*emit_line)(FILE* w, size_t i, size_t end, output_state* os); 478 479 // emit_offset is chosen to emit the offset at the start of each line 480 void (*emit_offset)(FILE* w, size_t offset); 481 } config; 482 483 // handle_reader shows all bytes read from the source given as colored hex 484 // values, showing offsets and ASCII symbols on the sides of each output line 485 void handle_reader(FILE* w, FILE* src, config cfg) { 486 const size_t bufcap = 32 * 1024; 487 // limit line-width to the buffer's capacity 488 if (cfg.bytes_per_line > bufcap) { 489 cfg.bytes_per_line = bufcap; 490 } 491 492 const size_t two_lines = 2 * cfg.bytes_per_line; 493 unsigned char txt[two_lines]; 494 495 unsigned char buf[bufcap]; 496 // ensure the effective buffer-size is a multiple of the line-width 497 size_t max = bufcap - bufcap % cfg.bytes_per_line; 498 499 output_state os; 500 os.buf = buf; 501 os.linewidth = cfg.bytes_per_line; 502 os.lines = 0; 503 os.offset = 0; 504 os.txt = txt; 505 os.emit_offset = cfg.emit_offset; 506 507 const size_t one_line = cfg.bytes_per_line; 508 509 while (!feof(w)) { 510 os.buflen = fread(&buf, sizeof(buf[0]), max, src); 511 if (os.buflen < 1) { 512 // assume input is over when no bytes were read 513 break; 514 } 515 516 for (size_t i = 0; i < os.buflen; i += one_line) { 517 size_t end; 518 519 // remember all ASCII symbols in current pair of output lines 520 end = i + two_lines < os.buflen ? i + two_lines : os.buflen; 521 peek_ascii(i, end, &os); 522 523 // show current output line 524 end = i + one_line < os.buflen ? i + one_line : os.buflen; 525 cfg.emit_line(w, i, end, &os); 526 } 527 } 528 529 fflush(w); 530 } 531 532 // handle_file handles data from the filename given; returns false only when 533 // the file can't be opened 534 bool handle_file(FILE* w, const char* path, config cfg) { 535 FILE* f = fopen(path, "rb"); 536 if (f == NULL) { 537 // ensure currently-buffered/deferred output shows up right now: not 538 // doing so may scramble results in the common case where stdout and 539 // stderr are the same, thus confusing users 540 putc('\n', w); 541 542 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 543 return false; 544 } 545 546 // get the file size 547 struct stat st; 548 fstat(fileno(f), &st); 549 550 // show output 551 cfg.emit_file_info(w, path, st.st_size); 552 EMIT_CONST(w, "\n"); 553 handle_reader(w, f, cfg); 554 555 fclose(f); 556 return true; 557 } 558 559 // is_help_option simplifies control-flow for func run 560 bool is_help_option(char* s) { 561 return (s[0] == '-') && ( 562 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 563 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 564 ); 565 } 566 567 // is_plain_option simplifies control-flow for func run 568 bool is_plain_option(char* s) { 569 return (s[0] == '-') && ( 570 strcmp(s, "-p") == 0 || strcmp(s, "-plain") == 0 || 571 strcmp(s, "--p") == 0 || strcmp(s, "--plain") == 0 572 ); 573 } 574 575 // is_hex_offsets simplifies control-flow for func run 576 bool is_hex_offsets_option(char* s) { 577 return (s[0] == '-') && ( 578 strcmp(s, "-ho") == 0 || strcmp(s, "--ho") == 0 || 579 strcmp(s, "-hexoffsets") == 0 || strcmp(s, "--hexoffsets") == 0 || 580 strcmp(s, "-hex-offsets") == 0 || strcmp(s, "--hex-offsets") == 0 581 ); 582 } 583 584 // run returns the number of errors 585 int run(int argc, char** argv, FILE* w) { 586 config cfg; 587 cfg.bytes_per_line = 16; 588 cfg.emit_line = &emit_styled_line; 589 cfg.emit_file_info = &emit_styled_file_info; 590 cfg.emit_offset = &write_styled_uint; 591 592 size_t files = 0; 593 size_t errors = 0; 594 595 // handle all filenames/options given 596 for (size_t i = 1; i < argc && !feof(w); i++) { 597 // a `-` filename stands for the standard input 598 if (argv[i][0] == '-' && argv[i][1] == 0) { 599 EMIT_CONST(w, "• <stdin>\n"); 600 EMIT_CONST(w, "\n"); 601 handle_reader(w, stdin, cfg); 602 continue; 603 } 604 605 if (is_help_option(argv[i])) { 606 // help option quits the app right away 607 fprintf(stderr, "%s", info); 608 return 0; 609 } 610 611 if (is_plain_option(argv[i])) { 612 cfg.emit_line = &emit_plain_line; 613 cfg.emit_file_info = &emit_plain_file_info; 614 continue; 615 } 616 617 if (is_hex_offsets_option(argv[i])) { 618 cfg.emit_offset = &write_hex_uint; 619 continue; 620 } 621 622 if (files > 0) { 623 // put an empty line between adjacent hex outputs 624 putc('\n', w); 625 } 626 627 if (!handle_file(w, argv[i], cfg)) { 628 errors++; 629 } 630 files++; 631 } 632 633 // no filenames means use stdin as the only input 634 if (files == 0) { 635 EMIT_CONST(w, "• <stdin>\n"); 636 EMIT_CONST(w, "\n"); 637 handle_reader(w, stdin, cfg); 638 } 639 640 return errors; 641 } 642 643 int main(int argc, char** argv) { 644 #ifdef _WIN32 645 setmode(fileno(stdin), O_BINARY); 646 // ensure output lines end in LF instead of CRLF on windows 647 setmode(fileno(stdout), O_BINARY); 648 setmode(fileno(stderr), O_BINARY); 649 #endif 650 651 // enable full buffering for stdout 652 setvbuf(stdout, NULL, _IOFBF, 0); 653 654 return run(argc, argv, stdout) == 0 ? 0 : 1; 655 }