File: nh.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 cc -Wall -s -O2 -o ./nh ./nh.c 28 29 Building with COMPACT_OUTPUT defined makes `nh` output many fewer bytes, at 30 the cost of using arguably worse colors. 31 */ 32 33 #include <fcntl.h> 34 #include <math.h> 35 #include <stdbool.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <sys/stat.h> 40 41 // #define COMPACT_OUTPUT 42 43 // info is the multi-line help message 44 const char* info = 45 "" 46 "nh [options...] [filenames...]\n" 47 "\n" 48 "Nice Hexadecimal is a simple hexadecimal (base-16) viewer to inspect bytes\n" 49 "from files or standard input.\n" 50 "\n" 51 "Each line shows the starting offset for the bytes shown, 16 of the bytes\n" 52 "themselves in base-16 notation, and any ASCII codes when the byte values\n" 53 "are in the typical ASCII range.\n" 54 "\n" 55 "The base-16 codes are color-coded, with most bytes shown in gray, while\n" 56 "all-1 and all-0 bytes are shown in orange and blue respectively.\n" 57 "\n" 58 "All-0 bytes are the commonest kind in most binary file types and, along\n" 59 "with all-1 bytes are also a special case worth noticing when exploring\n" 60 "binary data, so it makes sense for them to stand out right away.\n" 61 "\n" 62 "\n" 63 "Options\n" 64 "\n" 65 " -h, --h show this help message\n" 66 " -help, --help aliases for option -h\n" 67 "\n" 68 " -p, --p plain-text output, without ANSI styles\n" 69 " -plain, --plain aliases for option -p\n" 70 ""; 71 72 #ifdef COMPACT_OUTPUT 73 #define OUTPUT_FOR_00 "\x1b[34m00 " 74 #define OUTPUT_FOR_FF "\x1b[33mff " 75 #define NORMAL_HEX_STYLE "\x1b[37m" 76 #define ASCII_HEX_STYLE "\x1b[32m" 77 #define ASCII_BYTE_STYLE "\x1b[30m" 78 #else 79 #define OUTPUT_FOR_00 "\x1b[38;5;111m00 " 80 #define OUTPUT_FOR_FF "\x1b[38;5;209mff " 81 #define NORMAL_HEX_STYLE "\x1b[38;5;246m" 82 #define ASCII_HEX_STYLE "\x1b[38;5;72m" 83 #define ASCII_BYTE_STYLE "\x1b[38;5;239m" 84 #endif 85 86 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 87 // stdout, it considerably speeds up this app, as intended 88 typedef struct bufwriter { 89 // buf is the buffer proper 90 unsigned char* buf; 91 92 // len is how many bytes of the buffer are currently being used 93 size_t len; 94 95 // cap is the capacity of the buffer, or the most bytes it can hold 96 size_t cap; 97 98 // out is the destination of all that's written into the buffer 99 FILE* out; 100 101 // done signals when/if no more output is accepted at the destination 102 bool done; 103 } bufwriter; 104 105 // new_bufwriter is the constructor for type bufwriter 106 bufwriter new_bufwriter(FILE* dst, size_t cap) { 107 bufwriter res; 108 res.cap = cap; 109 res.done = false; 110 res.len = 0; 111 res.out = dst; 112 res.buf = malloc(res.cap); 113 return res; 114 } 115 116 // flush does as it says: it empties the buffer after ensuring its bytes end 117 // on their intended destination 118 void flush(bufwriter* w) { 119 if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) { 120 w->done = true; 121 } 122 w->len = 0; 123 } 124 125 // close_bufwriter ensures all output is shown and deallocates the buffer 126 void close_bufwriter(bufwriter* w) { 127 flush(w); 128 free(w->buf); 129 w->buf = NULL; 130 } 131 132 // write_bytes does as it says, minimizing the number of calls to fwrite 133 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) { 134 if (w->len + len < w->cap) { 135 // all bytes fit into buffer 136 memcpy(w->buf + w->len, src, len); 137 w->len += len; 138 return; 139 } 140 141 // ensure current buffer bytes go out, before crossing strides 142 flush(w); 143 144 // emit all chunks striding beyond/at the buffer's capacity 145 for (; len >= w->cap; src += w->cap, len -= w->cap) { 146 if (fwrite(src, w->cap, 1, w->out) < 1) { 147 w->done = true; 148 return; 149 } 150 } 151 152 // now all, if any, remaining bytes will fit into the buffer 153 memcpy(w->buf, src, len); 154 w->len += len; 155 } 156 157 // write_byte does as it says 158 void write_byte(bufwriter* w, unsigned char b) { 159 if (w->len >= w->cap) { 160 flush(w); 161 } 162 163 unsigned char* ptr = w->buf + w->len; 164 *ptr = b; 165 w->len++; 166 } 167 168 // EMIT_CONST abstracts a common use-case of the bufwriter, which is 169 // emitting string constants without their final null byte 170 #define EMIT_CONST(w, x) write_bytes(w, (unsigned char*)x, sizeof(x) - 1) 171 172 // write_hex is faster than calling fprintf(w, "%02x", b): this matters 173 // because it's called for every input byte 174 void write_hex(bufwriter* w, unsigned char b) { 175 const char* hex_digits = "0123456789abcdef"; 176 write_byte(w, hex_digits[b >> 4]); 177 write_byte(w, hex_digits[b & 0x0f]); 178 } 179 180 // write_styled_hex emits an ANSI color-coded hexadecimal representation 181 // of the byte given 182 void write_styled_hex(bufwriter* w, unsigned char b) { 183 // all-bits-off is almost always noteworthy 184 if (b == 0) { 185 EMIT_CONST(w, OUTPUT_FOR_00); 186 return; 187 } 188 // all-bits-on is often noteworthy 189 if (b == 0xff) { 190 EMIT_CONST(w, OUTPUT_FOR_FF); 191 return; 192 } 193 194 // regular ASCII display symbols 195 if (32 <= b && b <= 126) { 196 EMIT_CONST(w, ASCII_HEX_STYLE); 197 write_hex(w, b); 198 EMIT_CONST(w, ASCII_BYTE_STYLE); 199 write_byte(w, b); 200 return; 201 } 202 203 // ASCII control values, and other bytes beyond displayable ASCII 204 EMIT_CONST(w, NORMAL_HEX_STYLE); 205 write_hex(w, b); 206 write_byte(w, ' '); 207 } 208 209 // ruler emits a ruler-like string of spaced-out symbols 210 void ruler(bufwriter* w, size_t bytes_per_line) { 211 const size_t gap = 4; 212 if (bytes_per_line < gap) { 213 return; 214 } 215 216 EMIT_CONST(w, " ·"); 217 for (size_t n = bytes_per_line - gap; n >= gap; n -= gap) { 218 EMIT_CONST(w, " ·"); 219 } 220 } 221 222 // write_commas_uint shows a number by separating 3-digits groups with commas 223 void write_commas_uint(bufwriter* w, size_t n) { 224 if (n == 0) { 225 EMIT_CONST(w, "0"); 226 return; 227 } 228 229 size_t digits; 230 // 20 is the most digits unsigned 64-bit ints can ever need 231 unsigned char buf[24]; 232 for (digits = 0; n > 0; digits++, n /= 10) { 233 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 234 } 235 236 // now emit the leading digits, which may not come in 3 237 size_t leading = digits % 3; 238 if (leading == 0) { 239 // avoid having a comma before the first digit 240 leading = digits < 3 ? digits : 3; 241 } 242 unsigned char* start = buf + sizeof(buf) - digits; 243 write_bytes(w, start, leading); 244 start += leading; 245 digits -= leading; 246 247 // now emit all remaining digits in groups of 3, alternating styles 248 for (; digits > 0; start += 3, digits -= 3) { 249 write_byte(w, ','); 250 write_bytes(w, start, 3); 251 } 252 } 253 254 // output_state ties all values representing the current state shared across 255 // all functions involved in interpreting the input-buffer and showing its 256 // bytes and ASCII values 257 typedef struct output_state { 258 // the whole input-buffer and its currently-used length in bytes 259 unsigned char* buf; 260 size_t buflen; 261 262 // the ASCII-text buffer and its currently-used length in bytes 263 unsigned char* txt; 264 size_t txtlen; 265 266 // offset is the byte counter, shown at the start of each line 267 size_t offset; 268 269 // linewidth is how many bytes each line can show at most 270 size_t linewidth; 271 272 // lines is the line counter, which is used to provide periodic 273 // breather lines, to make eye-scanning big output blobs easier 274 size_t lines; 275 276 // showtxt is a hint on whether it's sensible to show the ASCII-text 277 // buffer for the current line 278 bool showtxt; 279 } output_state; 280 281 // peek_ascii looks 2 lines ahead in the buffer to get all ASCII-like runs 282 // of bytes, which are later meant to show on the side panel 283 void peek_ascii(size_t i, size_t end, output_state* os) { 284 unsigned char prev = 0; 285 os->txtlen = 0; 286 287 for (size_t j = i; j < end; j++) { 288 const unsigned char b = os->buf[j]; 289 290 if (' ' < b && b <= '~') { 291 bool first = os->txtlen == 0; 292 if (first) { 293 // show ASCII panel, if the symbols start on the current line 294 os->showtxt = j - i < os->linewidth; 295 } 296 297 // add a space before the symbol, when it's the start of a `word` 298 if ((prev <= ' ' || prev > '~') && !first) { 299 os->txt[os->txtlen] = ' '; 300 os->txtlen++; 301 } 302 303 // add the symbol itself 304 os->txt[os->txtlen] = b; 305 os->txtlen++; 306 } 307 308 prev = b; 309 } 310 } 311 312 // write_plain_uint is the unstyled counterpart of func write_styled_uint 313 void write_plain_uint(bufwriter* w, size_t n) { 314 if (n < 1) { 315 EMIT_CONST(w, " 0"); 316 return; 317 } 318 319 size_t digits; 320 // 20 is the most digits unsigned 64-bit ints can ever need 321 unsigned char buf[24]; 322 for (digits = 0; n > 0; digits++, n /= 10) { 323 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 324 } 325 326 // left-pad the coming digits up to 8 chars 327 if (digits < 8) { 328 write_bytes(w, (unsigned char*)" ", 8 - digits); 329 } 330 331 // emit all digits 332 unsigned char* start = buf + sizeof(buf) - digits; 333 write_bytes(w, start, digits); 334 } 335 336 // write_styled_uint is a quick way to emit the offset-counter showing at the 337 // start of each line; it assumes 8-item left-padding of values, unless the 338 // numbers are too big for that 339 void write_styled_uint(bufwriter* w, size_t n) { 340 if (n < 1) { 341 EMIT_CONST(w, " 0"); 342 return; 343 } 344 345 size_t digits; 346 // 20 is the most digits unsigned 64-bit ints can ever need 347 unsigned char buf[24]; 348 for (digits = 0; n > 0; digits++, n /= 10) { 349 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 350 } 351 352 // left-pad the coming digits up to 8 chars 353 if (digits < 8) { 354 write_bytes(w, (unsigned char*)" ", 8 - digits); 355 } 356 357 // now emit the leading digits, which may be fewer than 3 358 size_t leading = digits % 3; 359 unsigned char* start = buf + sizeof(buf) - digits; 360 write_bytes(w, start, leading); 361 start += leading; 362 digits -= leading; 363 364 // now emit all remaining digits in groups of 3, alternating styles 365 bool styled = leading != 0; 366 for (; digits > 0; start += 3, digits -= 3, styled = !styled) { 367 if (styled) { 368 EMIT_CONST(w, "\x1b[38;5;243m"); 369 write_bytes(w, start, 3); 370 EMIT_CONST(w, "\x1b[0m"); 371 } else { 372 write_bytes(w, start, 3); 373 } 374 } 375 } 376 377 // emit_styled_file_info emits an ANSI-styled line showing a filename and the 378 // file's size in bytes 379 void emit_styled_file_info(bufwriter* w, const char* path, size_t nbytes) { 380 EMIT_CONST(w, "• "); 381 write_bytes(w, (unsigned char*)path, strlen(path)); 382 EMIT_CONST(w, " \x1b[38;5;245m("); 383 write_commas_uint(w, nbytes); 384 EMIT_CONST(w, " bytes)\x1b[0m\n"); 385 } 386 387 // emit_plain_file_info is the unstyled counterpart of func emit_styled_file_info 388 void emit_plain_file_info(bufwriter* w, const char* path, size_t nbytes) { 389 EMIT_CONST(w, "• "); 390 write_bytes(w, (unsigned char*)path, strlen(path)); 391 EMIT_CONST(w, " ("); 392 write_commas_uint(w, nbytes); 393 EMIT_CONST(w, " bytes)\n"); 394 } 395 396 // emit_styled_line handles the details of showing a styled line out of the current 397 // input-buffer chunk 398 void emit_styled_line(bufwriter* w, size_t i, size_t end, output_state* os) { 399 for (size_t j = i; j < end; j++, os->offset++) { 400 const unsigned char b = os->buf[j]; 401 402 if (j % os->linewidth == 0) { 403 // show a ruler every few lines to make eye-scanning easier 404 if (os->lines % 5 == 0 && os->lines > 0) { 405 EMIT_CONST(w, " \x1b[38;5;245m"); 406 ruler(w, os->linewidth); 407 EMIT_CONST(w, "\x1b[0m\n"); 408 } 409 os->lines++; 410 411 // start next line with offset of its 1st item, also 412 // changing the background color for the colored hex 413 // code which will follow 414 // fprintf(stdout, "%8d", os->offset); 415 write_styled_uint(w, os->offset); 416 EMIT_CONST(w, " \x1b[48;5;254m"); 417 } 418 419 // show the current byte `with style` 420 write_styled_hex(w, b); 421 } 422 423 if (os->showtxt) { 424 EMIT_CONST(w, "\x1b[0m "); 425 for (size_t j = end - i; j < os->linewidth; j++) { 426 EMIT_CONST(w, " "); 427 } 428 429 write_bytes(w, os->txt, os->txtlen); 430 write_byte(w, '\n'); 431 return; 432 } 433 EMIT_CONST(w, "\x1b[0m\n"); 434 } 435 436 // emit_plain_line handles the details of showing a plain (unstyled) line out 437 // of the current input-buffer chunk 438 void emit_plain_line(bufwriter* w, size_t i, size_t end, output_state* os) { 439 for (size_t j = i; j < end; j++, os->offset++) { 440 const unsigned char b = os->buf[j]; 441 442 if (j % os->linewidth == 0) { 443 // show a ruler every few lines to make eye-scanning easier 444 if (os->lines % 5 == 0 && os->lines > 0) { 445 // EMIT_CONST(w, " "); 446 // ruler(w, os->linewidth); 447 write_byte(w, '\n'); 448 } 449 os->lines++; 450 451 // start next line with offset of its 1st item, also 452 // changing the background color for the colored hex 453 // code which will follow 454 // fprintf(stdout, "%8d", os->offset); 455 write_plain_uint(w, os->offset); 456 EMIT_CONST(w, " "); 457 } 458 459 // show the current byte `with style` 460 write_hex(w, b); 461 write_byte(w, ' '); 462 } 463 464 if (os->showtxt) { 465 EMIT_CONST(w, " "); 466 for (size_t j = end - i; j < os->linewidth; j++) { 467 EMIT_CONST(w, " "); 468 } 469 470 write_bytes(w, os->txt, os->txtlen); 471 write_byte(w, '\n'); 472 return; 473 } 474 write_byte(w, '\n'); 475 } 476 477 // config has all the settings used to emit output 478 typedef struct config { 479 // bytes_per_line determines the `width` of output lines 480 size_t bytes_per_line; 481 482 // emit_file_info is chosen to emit file-info with colors or plainly 483 void (*emit_file_info)(bufwriter* w, const char* path, size_t nbytes); 484 485 // emit_line is chosen to emit hex bytes with colors or plainly 486 void (*emit_line)(bufwriter* w, size_t i, size_t end, output_state* os); 487 } config; 488 489 // handle_reader shows all bytes read from the source given as colored hex 490 // values, showing offsets and ASCII symbols on the sides of each output line 491 void handle_reader(bufwriter* w, FILE* src, config cfg) { 492 const size_t bufcap = 32 * 1024; 493 // limit line-width to the buffer's capacity 494 if (cfg.bytes_per_line > bufcap) { 495 cfg.bytes_per_line = bufcap; 496 } 497 498 const size_t two_lines = 2 * cfg.bytes_per_line; 499 unsigned char txt[two_lines]; 500 501 unsigned char buf[bufcap]; 502 // ensure the effective buffer-size is a multiple of the line-width 503 size_t max = bufcap - bufcap % cfg.bytes_per_line; 504 505 output_state os; 506 os.buf = buf; 507 os.linewidth = cfg.bytes_per_line; 508 os.lines = 0; 509 os.offset = 0; 510 os.txt = txt; 511 512 const size_t one_line = cfg.bytes_per_line; 513 514 while (!w->done) { 515 os.buflen = fread(&buf, sizeof(unsigned char), max, src); 516 if (os.buflen < 1) { 517 // assume input is over when no bytes were read 518 return; 519 } 520 521 for (size_t i = 0; i < os.buflen; i += one_line) { 522 size_t end; 523 524 // remember all ASCII symbols in current pair of output lines 525 end = i + two_lines < os.buflen ? i + two_lines : os.buflen; 526 peek_ascii(i, end, &os); 527 528 // show current output line 529 end = i + one_line < os.buflen ? i + one_line : os.buflen; 530 cfg.emit_line(w, i, end, &os); 531 } 532 } 533 } 534 535 // handle_file handles data from the filename given; returns false only when 536 // the file can't be opened 537 bool handle_file(bufwriter* w, const char* path, config cfg) { 538 // a `-` filename stands for the standard input 539 if (strcmp(path, "-") == 0) { 540 EMIT_CONST(w, "• <stdin>\n"); 541 EMIT_CONST(w, "\n"); 542 handle_reader(w, stdin, cfg); 543 return true; 544 } 545 546 FILE* f = fopen(path, "rb"); 547 if (f == NULL) { 548 // ensure currently-buffered/deferred output shows up right now: not 549 // doing so may scramble results in the common case where stdout and 550 // stderr are the same, thus confusing users 551 flush(w); 552 553 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 554 return false; 555 } 556 557 // get the file size 558 struct stat st; 559 fstat(fileno(f), &st); 560 561 // show output 562 cfg.emit_file_info(w, path, st.st_size); 563 EMIT_CONST(w, "\n"); 564 handle_reader(w, f, cfg); 565 566 fclose(f); 567 return true; 568 } 569 570 // is_help_option simplifies control-flow for func run 571 bool is_help_option(char* s) { 572 return false || 573 strcmp(s, "-h") == 0 || 574 strcmp(s, "-help") == 0 || 575 strcmp(s, "--h") == 0 || 576 strcmp(s, "--help") == 0; 577 } 578 579 // is_plain_option simplifies control-flow for func run 580 bool is_plain_option(char* s) { 581 return false || 582 strcmp(s, "-p") == 0 || 583 strcmp(s, "-plain") == 0 || 584 strcmp(s, "--p") == 0 || 585 strcmp(s, "--plain") == 0; 586 } 587 588 // run returns the number of errors 589 size_t run(int argc, char** argv) { 590 config cfg; 591 cfg.bytes_per_line = 16; 592 cfg.emit_line = &emit_styled_line; 593 cfg.emit_file_info = &emit_styled_file_info; 594 595 // handle special cmd-line options and count filenames 596 size_t fnames = 0; 597 for (size_t i = 1; i < argc; i++) { 598 if (is_help_option(argv[i])) { 599 // help option is handled right away, also quitting the app 600 fprintf(stderr, "%s", info); 601 return 0; 602 } 603 if (is_plain_option(argv[i])) { 604 cfg.emit_line = &emit_plain_line; 605 cfg.emit_file_info = &emit_plain_file_info; 606 continue; 607 } 608 fnames++; 609 } 610 611 bufwriter w = new_bufwriter(stdout, 32 * 1024); 612 613 // no filenames means use stdin as the only input 614 if (fnames == 0) { 615 EMIT_CONST(&w, "• <stdin>\n"); 616 EMIT_CONST(&w, "\n"); 617 handle_reader(&w, stdin, cfg); 618 close_bufwriter(&w); 619 return 0; 620 } 621 622 size_t errors = 0; 623 bool first_file = true; 624 625 // handle all filenames given 626 for (size_t i = 1; i < argc && !w.done; i++) { 627 if (i == 1 && is_plain_option(argv[i])) { 628 // special cmd-line options aren't filenames 629 continue; 630 } 631 632 if (!first_file) { 633 // put an empty line between adjacent hex outputs 634 write_byte(&w, '\n'); 635 } 636 637 if (!handle_file(&w, argv[i], cfg)) { 638 errors++; 639 } 640 first_file = false; 641 } 642 643 close_bufwriter(&w); 644 return errors; 645 } 646 647 int main(int argc, char** argv) { 648 #ifdef _WIN32 649 setmode(fileno(stdin), O_BINARY); 650 // ensure output lines end in LF instead of CRLF on windows 651 setmode(fileno(stdout), O_BINARY); 652 setmode(fileno(stderr), O_BINARY); 653 #endif 654 655 // disable automatic stdio buffering, in favor of explicit buffering 656 setvbuf(stdin, NULL, _IONBF, 0); 657 setvbuf(stdout, NULL, _IONBF, 0); 658 setvbuf(stderr, NULL, _IONBF, 0); 659 660 return run(argc, argv) == 0 ? 0 : 1; 661 }