File: nh.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 cc -Wall -s -O2 -o ./nh ./nh.c 28 29 Building with COMPACT_OUTPUT defined makes `nh` output many fewer bytes, at 30 the cost of using arguably worse colors. 31 */ 32 33 #include <fcntl.h> 34 #include <math.h> 35 #include <stdbool.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <sys/stat.h> 40 41 #ifdef _WIN32 42 #include <windows.h> 43 #endif 44 45 // #define COMPACT_OUTPUT 46 47 // info is the multi-line help message 48 const char* info = "" 49 "nh [options...] [filenames...]\n" 50 "\n" 51 "Nice Hexadecimal is a simple hexadecimal (base-16) viewer to inspect bytes\n" 52 "from files or standard input.\n" 53 "\n" 54 "Each line shows the starting offset for the bytes shown, 16 of the bytes\n" 55 "themselves in base-16 notation, and any ASCII codes when the byte values\n" 56 "are in the typical ASCII range.\n" 57 "\n" 58 "The base-16 codes are color-coded, with most bytes shown in gray, while\n" 59 "all-1 and all-0 bytes are shown in orange and blue respectively.\n" 60 "\n" 61 "All-0 bytes are the commonest kind in most binary file types and, along\n" 62 "with all-1 bytes are also a special case worth noticing when exploring\n" 63 "binary data, so it makes sense for them to stand out right away.\n" 64 "\n" 65 "\n" 66 "Options\n" 67 "\n" 68 " -h, --h show this help message\n" 69 " -help, --help aliases for option -h\n" 70 "\n" 71 " -p, --p plain-text output, without ANSI styles\n" 72 " -plain, --plain aliases for option -p\n" 73 ""; 74 75 #ifdef COMPACT_OUTPUT 76 #define OUTPUT_FOR_00 "\x1b[34m00 " 77 #define OUTPUT_FOR_FF "\x1b[33mff " 78 #define NORMAL_HEX_STYLE "\x1b[37m" 79 #define ASCII_HEX_STYLE "\x1b[32m" 80 #define ASCII_BYTE_STYLE "\x1b[30m" 81 #else 82 #define OUTPUT_FOR_00 "\x1b[38;5;111m00 " 83 #define OUTPUT_FOR_FF "\x1b[38;5;209mff " 84 #define NORMAL_HEX_STYLE "\x1b[38;5;246m" 85 #define ASCII_HEX_STYLE "\x1b[38;5;72m" 86 #define ASCII_BYTE_STYLE "\x1b[38;5;239m" 87 #endif 88 89 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 90 // stdout, it considerably speeds up this app, as intended 91 typedef struct bufwriter { 92 // buf is the buffer proper 93 unsigned char* buf; 94 95 // len is how many bytes of the buffer are currently being used 96 size_t len; 97 98 // cap is the capacity of the buffer, or the most bytes it can hold 99 size_t cap; 100 101 // out is the destination of all that's written into the buffer 102 FILE* out; 103 104 // done signals when/if no more output is accepted at the destination 105 bool done; 106 } bufwriter; 107 108 // init_bufwriter is the constructor for type bufwriter 109 void init_bufwriter(bufwriter* w, FILE* dst, unsigned char* buf, size_t cap) { 110 w->buf = buf; 111 w->len = 0; 112 w->cap = cap; 113 w->out = dst; 114 w->done = false; 115 } 116 117 // flush does as it says: it empties the buffer after ensuring its bytes end 118 // on their intended destination 119 void flush(bufwriter* w) { 120 if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) { 121 w->done = true; 122 } 123 w->len = 0; 124 } 125 126 // write_bytes does as it says, minimizing the number of calls to fwrite 127 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) { 128 if (w->len + len < w->cap) { 129 // all bytes fit into buffer 130 memcpy(w->buf + w->len, src, len); 131 w->len += len; 132 return; 133 } 134 135 // ensure current buffer bytes go out, before crossing strides 136 flush(w); 137 138 // emit all chunks striding beyond/at the buffer's capacity 139 for (; len >= w->cap; src += w->cap, len -= w->cap) { 140 if (fwrite(src, w->cap, 1, w->out) < 1) { 141 w->done = true; 142 return; 143 } 144 } 145 146 // now all, if any, remaining bytes will fit into the buffer 147 memcpy(w->buf, src, len); 148 w->len += len; 149 } 150 151 // write_byte does as it says 152 void write_byte(bufwriter* w, unsigned char b) { 153 if (w->len >= w->cap) { 154 flush(w); 155 } 156 157 w->buf[w->len] = b; 158 w->len++; 159 } 160 161 // EMIT_CONST abstracts a common use-case of the bufwriter, which is 162 // emitting string constants without their final null byte 163 #define EMIT_CONST(w, x) write_bytes(w, (unsigned char*)x, sizeof(x) - 1) 164 165 // write_hex is faster than calling fprintf(w, "%02x", b): this matters 166 // because it's called for every input byte 167 void write_hex(bufwriter* w, unsigned char b) { 168 const char* hex_digits = "0123456789abcdef"; 169 write_byte(w, hex_digits[b >> 4]); 170 write_byte(w, hex_digits[b & 0x0f]); 171 } 172 173 // write_styled_hex emits an ANSI color-coded hexadecimal representation 174 // of the byte given 175 void write_styled_hex(bufwriter* w, unsigned char b) { 176 // all-bits-off is almost always noteworthy 177 if (b == 0) { 178 EMIT_CONST(w, OUTPUT_FOR_00); 179 return; 180 } 181 // all-bits-on is often noteworthy 182 if (b == 0xff) { 183 EMIT_CONST(w, OUTPUT_FOR_FF); 184 return; 185 } 186 187 // regular ASCII display symbols 188 if (32 <= b && b <= 126) { 189 EMIT_CONST(w, ASCII_HEX_STYLE); 190 write_hex(w, b); 191 EMIT_CONST(w, ASCII_BYTE_STYLE); 192 write_byte(w, b); 193 return; 194 } 195 196 // ASCII control values, and other bytes beyond displayable ASCII 197 EMIT_CONST(w, NORMAL_HEX_STYLE); 198 write_hex(w, b); 199 write_byte(w, ' '); 200 } 201 202 // ruler emits a ruler-like string of spaced-out symbols 203 void ruler(bufwriter* w, size_t bytes_per_line) { 204 const size_t gap = 4; 205 if (bytes_per_line < gap) { 206 return; 207 } 208 209 EMIT_CONST(w, " ·"); 210 for (size_t n = bytes_per_line - gap; n >= gap; n -= gap) { 211 EMIT_CONST(w, " ·"); 212 } 213 } 214 215 // write_commas_uint shows a number by separating 3-digits groups with commas 216 void write_commas_uint(bufwriter* w, size_t n) { 217 if (n == 0) { 218 EMIT_CONST(w, "0"); 219 return; 220 } 221 222 size_t digits; 223 // 20 is the most digits unsigned 64-bit ints can ever need 224 unsigned char buf[24]; 225 for (digits = 0; n > 0; digits++, n /= 10) { 226 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 227 } 228 229 // now emit the leading digits, which may not come in 3 230 size_t leading = digits % 3; 231 if (leading == 0) { 232 // avoid having a comma before the first digit 233 leading = digits < 3 ? digits : 3; 234 } 235 unsigned char* start = buf + sizeof(buf) - digits; 236 write_bytes(w, start, leading); 237 start += leading; 238 digits -= leading; 239 240 // now emit all remaining digits in groups of 3, alternating styles 241 for (; digits > 0; start += 3, digits -= 3) { 242 write_byte(w, ','); 243 write_bytes(w, start, 3); 244 } 245 } 246 247 // output_state ties all values representing the current state shared across 248 // all functions involved in interpreting the input-buffer and showing its 249 // bytes and ASCII values 250 typedef struct output_state { 251 // the whole input-buffer and its currently-used length in bytes 252 unsigned char* buf; 253 size_t buflen; 254 255 // the ASCII-text buffer and its currently-used length in bytes 256 unsigned char* txt; 257 size_t txtlen; 258 259 // offset is the byte counter, shown at the start of each line 260 size_t offset; 261 262 // linewidth is how many bytes each line can show at most 263 size_t linewidth; 264 265 // lines is the line counter, which is used to provide periodic 266 // breather lines, to make eye-scanning big output blobs easier 267 size_t lines; 268 269 // showtxt is a hint on whether it's sensible to show the ASCII-text 270 // buffer for the current line 271 bool showtxt; 272 } output_state; 273 274 // peek_ascii looks 2 lines ahead in the buffer to get all ASCII-like runs 275 // of bytes, which are later meant to show on the side panel 276 void peek_ascii(size_t i, size_t end, output_state* os) { 277 unsigned char prev = 0; 278 os->txtlen = 0; 279 280 for (size_t j = i; j < end; j++) { 281 const unsigned char b = os->buf[j]; 282 283 if (' ' < b && b <= '~') { 284 bool first = os->txtlen == 0; 285 if (first) { 286 // show ASCII panel, if the symbols start on the current line 287 os->showtxt = j - i < os->linewidth; 288 } 289 290 // add a space before the symbol, when it's the start of a `word` 291 if ((prev <= ' ' || prev > '~') && !first) { 292 os->txt[os->txtlen] = ' '; 293 os->txtlen++; 294 } 295 296 // add the symbol itself 297 os->txt[os->txtlen] = b; 298 os->txtlen++; 299 } 300 301 prev = b; 302 } 303 } 304 305 // write_plain_uint is the unstyled counterpart of func write_styled_uint 306 void write_plain_uint(bufwriter* w, size_t n) { 307 if (n < 1) { 308 EMIT_CONST(w, " 0"); 309 return; 310 } 311 312 size_t digits; 313 // 20 is the most digits unsigned 64-bit ints can ever need 314 unsigned char buf[24]; 315 for (digits = 0; n > 0; digits++, n /= 10) { 316 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 317 } 318 319 // left-pad the coming digits up to 8 chars 320 if (digits < 8) { 321 write_bytes(w, (unsigned char*)" ", 8 - digits); 322 } 323 324 // emit all digits 325 unsigned char* start = buf + sizeof(buf) - digits; 326 write_bytes(w, start, digits); 327 } 328 329 // write_styled_uint is a quick way to emit the offset-counter showing at the 330 // start of each line; it assumes 8-item left-padding of values, unless the 331 // numbers are too big for that 332 void write_styled_uint(bufwriter* w, size_t n) { 333 if (n < 1) { 334 EMIT_CONST(w, " 0"); 335 return; 336 } 337 338 size_t digits; 339 // 20 is the most digits unsigned 64-bit ints can ever need 340 unsigned char buf[24]; 341 for (digits = 0; n > 0; digits++, n /= 10) { 342 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 343 } 344 345 // left-pad the coming digits up to 8 chars 346 if (digits < 8) { 347 write_bytes(w, (unsigned char*)" ", 8 - digits); 348 } 349 350 // now emit the leading digits, which may be fewer than 3 351 size_t leading = digits % 3; 352 unsigned char* start = buf + sizeof(buf) - digits; 353 write_bytes(w, start, leading); 354 start += leading; 355 digits -= leading; 356 357 // now emit all remaining digits in groups of 3, alternating styles 358 bool styled = leading != 0; 359 for (; digits > 0; start += 3, digits -= 3, styled = !styled) { 360 if (styled) { 361 EMIT_CONST(w, "\x1b[38;5;243m"); 362 write_bytes(w, start, 3); 363 EMIT_CONST(w, "\x1b[0m"); 364 } else { 365 write_bytes(w, start, 3); 366 } 367 } 368 } 369 370 // emit_styled_file_info emits an ANSI-styled line showing a filename and the 371 // file's size in bytes 372 void emit_styled_file_info(bufwriter* w, const char* path, size_t nbytes) { 373 EMIT_CONST(w, "• "); 374 write_bytes(w, (unsigned char*)path, strlen(path)); 375 EMIT_CONST(w, " \x1b[38;5;245m("); 376 write_commas_uint(w, nbytes); 377 EMIT_CONST(w, " bytes)\x1b[0m\n"); 378 } 379 380 // emit_plain_file_info is the unstyled counterpart of func emit_styled_file_info 381 void emit_plain_file_info(bufwriter* w, const char* path, size_t nbytes) { 382 EMIT_CONST(w, "• "); 383 write_bytes(w, (unsigned char*)path, strlen(path)); 384 EMIT_CONST(w, " ("); 385 write_commas_uint(w, nbytes); 386 EMIT_CONST(w, " bytes)\n"); 387 } 388 389 // emit_styled_line handles the details of showing a styled line out of the current 390 // input-buffer chunk 391 void emit_styled_line(bufwriter* w, size_t i, size_t end, output_state* os) { 392 for (size_t j = i; j < end; j++, os->offset++) { 393 const unsigned char b = os->buf[j]; 394 395 if (j % os->linewidth == 0) { 396 // show a ruler every few lines to make eye-scanning easier 397 if (os->lines % 5 == 0 && os->lines > 0) { 398 EMIT_CONST(w, " \x1b[38;5;245m"); 399 ruler(w, os->linewidth); 400 EMIT_CONST(w, "\x1b[0m\n"); 401 } 402 os->lines++; 403 404 // start next line with offset of its 1st item, also 405 // changing the background color for the colored hex 406 // code which will follow 407 // fprintf(stdout, "%8d", os->offset); 408 write_styled_uint(w, os->offset); 409 EMIT_CONST(w, " \x1b[48;5;254m"); 410 } 411 412 // show the current byte `with style` 413 write_styled_hex(w, b); 414 } 415 416 if (os->showtxt) { 417 EMIT_CONST(w, "\x1b[0m "); 418 for (size_t j = end - i; j < os->linewidth; j++) { 419 EMIT_CONST(w, " "); 420 } 421 422 write_bytes(w, os->txt, os->txtlen); 423 write_byte(w, '\n'); 424 return; 425 } 426 EMIT_CONST(w, "\x1b[0m\n"); 427 } 428 429 // emit_plain_line handles the details of showing a plain (unstyled) line out 430 // of the current input-buffer chunk 431 void emit_plain_line(bufwriter* w, size_t i, size_t end, output_state* os) { 432 for (size_t j = i; j < end; j++, os->offset++) { 433 const unsigned char b = os->buf[j]; 434 435 if (j % os->linewidth == 0) { 436 // show a ruler every few lines to make eye-scanning easier 437 if (os->lines % 5 == 0 && os->lines > 0) { 438 // EMIT_CONST(w, " "); 439 // ruler(w, os->linewidth); 440 write_byte(w, '\n'); 441 } 442 os->lines++; 443 444 // start next line with offset of its 1st item, also 445 // changing the background color for the colored hex 446 // code which will follow 447 // fprintf(stdout, "%8d", os->offset); 448 write_plain_uint(w, os->offset); 449 EMIT_CONST(w, " "); 450 } 451 452 // show the current byte `with style` 453 write_hex(w, b); 454 write_byte(w, ' '); 455 } 456 457 if (os->showtxt) { 458 EMIT_CONST(w, " "); 459 for (size_t j = end - i; j < os->linewidth; j++) { 460 EMIT_CONST(w, " "); 461 } 462 463 write_bytes(w, os->txt, os->txtlen); 464 write_byte(w, '\n'); 465 return; 466 } 467 write_byte(w, '\n'); 468 } 469 470 // config has all the settings used to emit output 471 typedef struct config { 472 // bytes_per_line determines the `width` of output lines 473 size_t bytes_per_line; 474 475 // emit_file_info is chosen to emit file-info with colors or plainly 476 void (*emit_file_info)(bufwriter* w, const char* path, size_t nbytes); 477 478 // emit_line is chosen to emit hex bytes with colors or plainly 479 void (*emit_line)(bufwriter* w, size_t i, size_t end, output_state* os); 480 } config; 481 482 bool has_line_feeds(unsigned char* buf, size_t len) { 483 for (size_t i = 0; i < len; i++) { 484 if (buf[i] == 10) { 485 return true; 486 } 487 } 488 return false; 489 } 490 491 // handle_reader shows all bytes read from the source given as colored hex 492 // values, showing offsets and ASCII symbols on the sides of each output line 493 void handle_reader(bufwriter* w, FILE* src, config cfg) { 494 const size_t bufcap = 48 * 1024; 495 // limit line-width to the buffer's capacity 496 if (cfg.bytes_per_line > bufcap) { 497 cfg.bytes_per_line = bufcap; 498 } 499 500 const size_t two_lines = 2 * cfg.bytes_per_line; 501 unsigned char txt[two_lines]; 502 503 unsigned char buf[bufcap]; 504 // ensure the effective buffer-size is a multiple of the line-width 505 size_t max = bufcap - bufcap % cfg.bytes_per_line; 506 507 output_state os; 508 os.buf = buf; 509 os.linewidth = cfg.bytes_per_line; 510 os.lines = 0; 511 os.offset = 0; 512 os.txt = txt; 513 514 const size_t one_line = cfg.bytes_per_line; 515 516 while (!w->done) { 517 os.buflen = fread(&buf, sizeof(unsigned char), max, src); 518 if (os.buflen < 1) { 519 // assume input is over when no bytes were read 520 flush(w); 521 return; 522 } 523 524 for (size_t i = 0; i < os.buflen; i += one_line) { 525 size_t end; 526 527 // remember all ASCII symbols in current pair of output lines 528 end = i + two_lines < os.buflen ? i + two_lines : os.buflen; 529 peek_ascii(i, end, &os); 530 531 // show current output line 532 end = i + one_line < os.buflen ? i + one_line : os.buflen; 533 cfg.emit_line(w, i, end, &os); 534 } 535 536 if (has_line_feeds(buf, os.buflen)) { 537 flush(w); 538 } 539 } 540 } 541 542 // handle_file handles data from the filename given; returns false only when 543 // the file can't be opened 544 bool handle_file(bufwriter* w, const char* path, config cfg) { 545 // a `-` filename stands for the standard input 546 if (path[0] == '-' && path[1] == 0) { 547 EMIT_CONST(w, "• <stdin>\n"); 548 EMIT_CONST(w, "\n"); 549 handle_reader(w, stdin, cfg); 550 return true; 551 } 552 553 FILE* f = fopen(path, "rb"); 554 if (f == NULL) { 555 // ensure currently-buffered/deferred output shows up right now: not 556 // doing so may scramble results in the common case where stdout and 557 // stderr are the same, thus confusing users 558 flush(w); 559 560 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 561 return false; 562 } 563 564 // get the file size 565 struct stat st; 566 fstat(fileno(f), &st); 567 568 // show output 569 cfg.emit_file_info(w, path, st.st_size); 570 EMIT_CONST(w, "\n"); 571 handle_reader(w, f, cfg); 572 573 fclose(f); 574 return true; 575 } 576 577 // is_help_option simplifies control-flow for func run 578 bool is_help_option(char* s) { 579 return (s[0] == '-') && ( 580 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 581 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 582 ); 583 } 584 585 // is_plain_option simplifies control-flow for func run 586 bool is_plain_option(char* s) { 587 return (s[0] == '-') && ( 588 strcmp(s, "-p") == 0 || strcmp(s, "-plain") == 0 || 589 strcmp(s, "--p") == 0 || strcmp(s, "--plain") == 0 590 ); 591 } 592 593 // run returns the number of errors 594 size_t run(int argc, char** argv) { 595 config cfg; 596 cfg.bytes_per_line = 16; 597 cfg.emit_line = &emit_styled_line; 598 cfg.emit_file_info = &emit_styled_file_info; 599 600 // handle special cmd-line options 601 for (size_t i = 1; i < argc; i++) { 602 if (is_help_option(argv[i])) { 603 // help option is handled right away, also quitting the app 604 fprintf(stderr, "%s", info); 605 return 0; 606 } 607 608 if (is_plain_option(argv[i])) { 609 cfg.emit_line = &emit_plain_line; 610 cfg.emit_file_info = &emit_plain_file_info; 611 continue; 612 } 613 } 614 615 bufwriter w; 616 unsigned char buf[48 * 1024]; 617 init_bufwriter(&w, stdout, buf, sizeof(buf)); 618 619 size_t files = 0; 620 size_t errors = 0; 621 622 // handle all filenames given 623 for (size_t i = 1; i < argc && !w.done; i++) { 624 if (i == 1 && is_plain_option(argv[i])) { 625 // special cmd-line options aren't filenames 626 continue; 627 } 628 629 if (files > 0) { 630 // put an empty line between adjacent hex outputs 631 write_byte(&w, '\n'); 632 } 633 634 if (!handle_file(&w, argv[i], cfg)) { 635 errors++; 636 } 637 files++; 638 } 639 640 // no filenames means use stdin as the only input 641 if (files == 0) { 642 EMIT_CONST(&w, "• <stdin>\n"); 643 EMIT_CONST(&w, "\n"); 644 handle_reader(&w, stdin, cfg); 645 } 646 647 return errors; 648 } 649 650 int main(int argc, char** argv) { 651 #ifdef _WIN32 652 setmode(fileno(stdin), O_BINARY); 653 // ensure output lines end in LF instead of CRLF on windows 654 setmode(fileno(stdout), O_BINARY); 655 setmode(fileno(stderr), O_BINARY); 656 #endif 657 658 // disable automatic stdio buffering, in favor of explicit buffering 659 setvbuf(stdin, NULL, _IONBF, 0); 660 setvbuf(stdout, NULL, _IONBF, 0); 661 setvbuf(stderr, NULL, _IONBF, 0); 662 663 return run(argc, argv) == 0 ? 0 : 1; 664 }