File: nh.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./nh ./nh.c
  29 
  30 Building with COMPACT_OUTPUT defined makes `nh` output many fewer bytes, at
  31 the cost of using arguably worse colors. You can do that by running
  32 
  33 cc -Wall -s -O2 -D COMPACT_OUTPUT -o ./nh ./nh.c
  34 */
  35 
  36 #include <fcntl.h>
  37 #include <math.h>
  38 #include <stdbool.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <sys/stat.h>
  43 
  44 #ifdef _WIN32
  45 #include <windows.h>
  46 #endif
  47 
  48 // #define COMPACT_OUTPUT
  49 
  50 // info is the multi-line help message
  51 const char* info = ""
  52 "nh [options...] [filenames...]\n"
  53 "\n"
  54 "Nice Hexadecimal is a simple hexadecimal (base-16) viewer to inspect bytes\n"
  55 "from files or standard input.\n"
  56 "\n"
  57 "Each line shows the starting offset for the bytes shown, 16 of the bytes\n"
  58 "themselves in base-16 notation, and any ASCII codes when the byte values\n"
  59 "are in the typical ASCII range.\n"
  60 "\n"
  61 "The base-16 codes are color-coded, with most bytes shown in gray, while\n"
  62 "all-1 and all-0 bytes are shown in orange and blue respectively.\n"
  63 "\n"
  64 "All-0 bytes are the commonest kind in most binary file types and, along\n"
  65 "with all-1 bytes are also a special case worth noticing when exploring\n"
  66 "binary data, so it makes sense for them to stand out right away.\n"
  67 "\n"
  68 "\n"
  69 "Options\n"
  70 "\n"
  71 "    -h, --h            show this help message\n"
  72 "    -help, --help      aliases for option -h\n"
  73 "\n"
  74 "    -p, --p            plain-text output, without ANSI styles\n"
  75 "    -plain, --plain    aliases for option -p\n"
  76 "\n"
  77 "    -ho, --ho          show hex (base-16) offsets, instead of base-10 ones\n"
  78 "";
  79 
  80 #ifdef COMPACT_OUTPUT
  81 #define OUTPUT_FOR_00 "\x1b[34m00 "
  82 #define OUTPUT_FOR_FF "\x1b[33mff "
  83 #define NORMAL_HEX_STYLE "\x1b[37m"
  84 #define ASCII_HEX_STYLE "\x1b[32m"
  85 #define ASCII_BYTE_STYLE "\x1b[30m"
  86 #else
  87 #define OUTPUT_FOR_00 "\x1b[38;5;111m00 "
  88 #define OUTPUT_FOR_FF "\x1b[38;5;209mff "
  89 #define NORMAL_HEX_STYLE "\x1b[38;5;246m"
  90 #define ASCII_HEX_STYLE "\x1b[38;5;72m"
  91 #define ASCII_BYTE_STYLE "\x1b[38;5;239m"
  92 #endif
  93 
  94 // EMIT_CONST abstracts emitting string constants without their final null byte
  95 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w)
  96 
  97 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) {
  98     fwrite(src, len, 1, w);
  99 }
 100 
 101 // write_hex is faster than calling fprintf(w, "%02x", b): this matters
 102 // because it's called for every input byte
 103 void write_hex(FILE* w, unsigned char b) {
 104     const char* hex_digits = "0123456789abcdef";
 105     putc(hex_digits[b >> 4], w);
 106     putc(hex_digits[b & 0x0f], w);
 107 }
 108 
 109 // write_styled_hex emits an ANSI color-coded hexadecimal representation
 110 // of the byte given
 111 void write_styled_hex(FILE* w, unsigned char b) {
 112     // all-bits-off is almost always noteworthy
 113     if (b == 0) {
 114         EMIT_CONST(w, OUTPUT_FOR_00);
 115         return;
 116     }
 117     // all-bits-on is often noteworthy
 118     if (b == 0xff) {
 119         EMIT_CONST(w, OUTPUT_FOR_FF);
 120         return;
 121     }
 122 
 123     // regular ASCII display symbols
 124     if (32 <= b && b <= 126) {
 125         EMIT_CONST(w, ASCII_HEX_STYLE);
 126         write_hex(w, b);
 127         EMIT_CONST(w, ASCII_BYTE_STYLE);
 128         putc(b, w);
 129         return;
 130     }
 131 
 132     // ASCII control values, and other bytes beyond displayable ASCII
 133     EMIT_CONST(w, NORMAL_HEX_STYLE);
 134     write_hex(w, b);
 135     putc(' ', w);
 136 }
 137 
 138 // ruler emits a ruler-like string of spaced-out symbols
 139 void ruler(FILE* w, size_t bytes_per_line) {
 140     const size_t gap = 4;
 141     if (bytes_per_line < gap) {
 142         return;
 143     }
 144 
 145     EMIT_CONST(w, "             ·");
 146     for (size_t n = bytes_per_line - gap; n >= gap; n -= gap) {
 147         EMIT_CONST(w, "           ·");
 148     }
 149 }
 150 
 151 // write_commas_uint shows a number by separating 3-digits groups with commas
 152 void write_commas_uint(FILE* w, size_t n) {
 153     if (n == 0) {
 154         EMIT_CONST(w, "0");
 155         return;
 156     }
 157 
 158     size_t digits;
 159     // 20 is the most digits unsigned 64-bit ints can ever need
 160     unsigned char buf[24];
 161     for (digits = 0; n > 0; digits++, n /= 10) {
 162         buf[sizeof(buf) - 1 - digits] = (n % 10) + '0';
 163     }
 164 
 165     // now emit the leading digits, which may not come in 3
 166     size_t leading = digits % 3;
 167     if (leading == 0) {
 168         // avoid having a comma before the first digit
 169         leading = digits < 3 ? digits : 3;
 170     }
 171     unsigned char* start = buf + sizeof(buf) - digits;
 172     write_bytes(w, start, leading);
 173     start += leading;
 174     digits -= leading;
 175 
 176     // now emit all remaining digits in groups of 3, alternating styles
 177     for (; digits > 0; start += 3, digits -= 3) {
 178         putc(',', w);
 179         write_bytes(w, start, 3);
 180     }
 181 }
 182 
 183 // output_state ties all values representing the current state shared across
 184 // all functions involved in interpreting the input-buffer and showing its
 185 // bytes and ASCII values
 186 typedef struct output_state {
 187     // the whole input-buffer and its currently-used length in bytes
 188     unsigned char* buf;
 189     size_t buflen;
 190 
 191     // the ASCII-text buffer and its currently-used length in bytes
 192     unsigned char* txt;
 193     size_t txtlen;
 194 
 195     // offset is the byte counter, shown at the start of each line
 196     size_t offset;
 197 
 198     // linewidth is how many bytes each line can show at most
 199     size_t linewidth;
 200 
 201     // lines is the line counter, which is used to provide periodic
 202     // breather lines, to make eye-scanning big output blobs easier
 203     size_t lines;
 204 
 205     // emit_offset is chosen to emit the offset at the start of each line
 206     void (*emit_offset)(FILE* w, size_t offset);
 207 
 208     // showtxt is a hint on whether it's sensible to show the ASCII-text
 209     // buffer for the current line
 210     bool showtxt;
 211 } output_state;
 212 
 213 // peek_ascii looks 2 lines ahead in the buffer to get all ASCII-like runs
 214 // of bytes, which are later meant to show on the side panel
 215 void peek_ascii(size_t i, size_t end, output_state* os) {
 216     unsigned char prev = 0;
 217     os->txtlen = 0;
 218 
 219     for (size_t j = i; j < end; j++) {
 220         const unsigned char b = os->buf[j];
 221 
 222         if (' ' < b && b <= '~') {
 223             bool first = os->txtlen == 0;
 224             if (first) {
 225                 // show ASCII panel, if the symbols start on the current line
 226                 os->showtxt = j - i < os->linewidth;
 227             }
 228 
 229             // add a space before the symbol, when it's the start of a `word`
 230             if ((prev <= ' ' || prev > '~') && !first) {
 231                 os->txt[os->txtlen] = ' ';
 232                 os->txtlen++;
 233             }
 234 
 235             // add the symbol itself
 236             os->txt[os->txtlen] = b;
 237             os->txtlen++;
 238         }
 239 
 240         prev = b;
 241     }
 242 }
 243 
 244 // write_plain_uint is the unstyled counterpart of func write_styled_uint
 245 void write_plain_uint(FILE* w, size_t n) {
 246     if (n < 1) {
 247         EMIT_CONST(w, "       0");
 248         return;
 249     }
 250 
 251     size_t digits;
 252     // 20 is the most digits unsigned 64-bit ints can ever need
 253     unsigned char buf[24];
 254     for (digits = 0; n > 0; digits++, n /= 10) {
 255         buf[sizeof(buf) - 1 - digits] = (n % 10) + '0';
 256     }
 257 
 258     // left-pad the coming digits up to 8 chars
 259     if (digits < 8) {
 260         write_bytes(w, (unsigned char*)"        ", 8 - digits);
 261     }
 262 
 263     // emit all digits
 264     unsigned char* start = buf + sizeof(buf) - digits;
 265     write_bytes(w, start, digits);
 266 }
 267 
 268 void write_hex_uint(FILE* w, size_t n) {
 269     if (n < 1) {
 270         EMIT_CONST(w, "00000000");
 271         return;
 272     }
 273 
 274     size_t digits;
 275     // 20 is the most digits unsigned 64-bit ints can ever need
 276     unsigned char buf[24];
 277     for (digits = 0; n > 0; digits += 2, n /= 256) {
 278         unsigned char b = n % 256;
 279         const char* hex_digits = "0123456789abcdef";
 280         buf[sizeof(buf) - 1 - digits - 1] = hex_digits[b >> 4];
 281         buf[sizeof(buf) - 1 - digits - 0] = hex_digits[b & 0x0f];
 282     }
 283 
 284     // left-pad the coming digits up to 8 chars
 285     if (digits < 8) {
 286         write_bytes(w, (unsigned char*)"00000000", 8 - digits);
 287     }
 288 
 289     // emit all digits
 290     unsigned char* start = buf + sizeof(buf) - digits;
 291     write_bytes(w, start, digits);
 292 }
 293 
 294 // write_styled_uint is a quick way to emit the offset-counter showing at the
 295 // start of each line; it assumes 8-item left-padding of values, unless the
 296 // numbers are too big for that
 297 void write_styled_uint(FILE* w, size_t n) {
 298     if (n < 1) {
 299         EMIT_CONST(w, "       0");
 300         return;
 301     }
 302 
 303     size_t digits;
 304     // 20 is the most digits unsigned 64-bit ints can ever need
 305     unsigned char buf[24];
 306     for (digits = 0; n > 0; digits++, n /= 10) {
 307         buf[sizeof(buf) - 1 - digits] = (n % 10) + '0';
 308     }
 309 
 310     // left-pad the coming digits up to 8 chars
 311     if (digits < 8) {
 312         write_bytes(w, (unsigned char*)"        ", 8 - digits);
 313     }
 314 
 315     // now emit the leading digits, which may be fewer than 3
 316     size_t leading = digits % 3;
 317     unsigned char* start = buf + sizeof(buf) - digits;
 318     write_bytes(w, start, leading);
 319     start += leading;
 320     digits -= leading;
 321 
 322     // now emit all remaining digits in groups of 3, alternating styles
 323     bool styled = leading != 0;
 324     for (; digits > 0; start += 3, digits -= 3, styled = !styled) {
 325         if (styled) {
 326             EMIT_CONST(w, "\x1b[38;5;248m");
 327             write_bytes(w, start, 3);
 328             EMIT_CONST(w, "\x1b[0m");
 329         } else {
 330             write_bytes(w, start, 3);
 331         }
 332     }
 333 }
 334 
 335 // emit_styled_file_info emits an ANSI-styled line showing a filename and the
 336 // file's size in bytes
 337 void emit_styled_file_info(FILE* w, const char* path, size_t nbytes) {
 338     EMIT_CONST(w, "");
 339     write_bytes(w, (unsigned char*)path, strlen(path));
 340     EMIT_CONST(w, "  \x1b[38;5;245m(");
 341     write_commas_uint(w, nbytes);
 342     EMIT_CONST(w, " bytes)\x1b[0m\n");
 343 }
 344 
 345 // emit_plain_file_info is the unstyled counterpart of func emit_styled_file_info
 346 void emit_plain_file_info(FILE* w, const char* path, size_t nbytes) {
 347     EMIT_CONST(w, "");
 348     write_bytes(w, (unsigned char*)path, strlen(path));
 349     EMIT_CONST(w, "  (");
 350     write_commas_uint(w, nbytes);
 351     EMIT_CONST(w, " bytes)\n");
 352 }
 353 
 354 // emit_styled_line handles the details of showing a styled line out of the current
 355 // input-buffer chunk
 356 void emit_styled_line(FILE* w, size_t i, size_t end, output_state* os) {
 357     for (size_t j = i; j < end; j++, os->offset++) {
 358         const unsigned char b = os->buf[j];
 359 
 360         if (j % os->linewidth == 0) {
 361             // show a ruler every few lines to make eye-scanning easier
 362             if (os->lines % 5 == 0 && os->lines > 0) {
 363                 EMIT_CONST(w, "        \x1b[38;5;245m");
 364                 ruler(w, os->linewidth);
 365                 EMIT_CONST(w, "\x1b[0m\n");
 366             }
 367             os->lines++;
 368 
 369             // start next line with offset of its 1st item, also
 370             // changing the background color for the colored hex
 371             // code which will follow
 372             // fprintf(stdout, "%8d", os->offset);
 373             // write_styled_uint(w, os->offset);
 374             os->emit_offset(w, os->offset);
 375             EMIT_CONST(w, "  \x1b[48;5;254m");
 376         }
 377 
 378         // show the current byte `with style`
 379         write_styled_hex(w, b);
 380     }
 381 
 382     if (os->showtxt) {
 383         EMIT_CONST(w, "\x1b[0m  ");
 384         for (size_t j = end - i; j < os->linewidth; j++) {
 385             EMIT_CONST(w, "   ");
 386         }
 387 
 388         write_bytes(w, os->txt, os->txtlen);
 389         putc('\n', w);
 390         return;
 391     }
 392     EMIT_CONST(w, "\x1b[0m\n");
 393 }
 394 
 395 // emit_plain_line handles the details of showing a plain (unstyled) line out
 396 // of the current input-buffer chunk
 397 void emit_plain_line(FILE* w, size_t i, size_t end, output_state* os) {
 398     for (size_t j = i; j < end; j++, os->offset++) {
 399         const unsigned char b = os->buf[j];
 400 
 401         if (j % os->linewidth == 0) {
 402             // show a ruler every few lines to make eye-scanning easier
 403             if (os->lines % 5 == 0 && os->lines > 0) {
 404                 // EMIT_CONST(w, "        ");
 405                 // ruler(w, os->linewidth);
 406                 putc('\n', w);
 407             }
 408             os->lines++;
 409 
 410             // start next line with offset of its 1st item, also
 411             // changing the background color for the colored hex
 412             // code which will follow
 413             // fprintf(stdout, "%8d", os->offset);
 414             write_plain_uint(w, os->offset);
 415             EMIT_CONST(w, "  ");
 416         }
 417 
 418         // show the current byte `with style`
 419         write_hex(w, b);
 420         putc(' ', w);
 421     }
 422 
 423     if (os->showtxt) {
 424         EMIT_CONST(w, "  ");
 425         for (size_t j = end - i; j < os->linewidth; j++) {
 426             EMIT_CONST(w, "   ");
 427         }
 428 
 429         write_bytes(w, os->txt, os->txtlen);
 430         putc('\n', w);
 431         return;
 432     }
 433     putc('\n', w);
 434 }
 435 
 436 // config has all the settings used to emit output
 437 typedef struct config {
 438     // bytes_per_line determines the `width` of output lines
 439     size_t bytes_per_line;
 440 
 441     // emit_file_info is chosen to emit file-info with colors or plainly
 442     void (*emit_file_info)(FILE* w, const char* path, size_t nbytes);
 443 
 444     // emit_line is chosen to emit hex bytes with colors or plainly
 445     void (*emit_line)(FILE* w, size_t i, size_t end, output_state* os);
 446 
 447     // emit_offset is chosen to emit the offset at the start of each line
 448     void (*emit_offset)(FILE* w, size_t offset);
 449 } config;
 450 
 451 bool has_line_feeds(const unsigned char* buf, size_t len) {
 452     for (size_t i = 0; i < len; i++) {
 453         if (buf[i] == 10) {
 454             return true;
 455         }
 456     }
 457     return false;
 458 }
 459 
 460 // handle_reader shows all bytes read from the source given as colored hex
 461 // values, showing offsets and ASCII symbols on the sides of each output line
 462 void handle_reader(FILE* w, FILE* src, config cfg) {
 463     const size_t bufcap = 32 * 1024;
 464     // limit line-width to the buffer's capacity
 465     if (cfg.bytes_per_line > bufcap) {
 466         cfg.bytes_per_line = bufcap;
 467     }
 468 
 469     const size_t two_lines = 2 * cfg.bytes_per_line;
 470     unsigned char txt[two_lines];
 471 
 472     unsigned char buf[bufcap];
 473     // ensure the effective buffer-size is a multiple of the line-width
 474     size_t max = bufcap - bufcap % cfg.bytes_per_line;
 475 
 476     output_state os;
 477     os.buf = buf;
 478     os.linewidth = cfg.bytes_per_line;
 479     os.lines = 0;
 480     os.offset = 0;
 481     os.txt = txt;
 482     os.emit_offset = cfg.emit_offset;
 483 
 484     const size_t one_line = cfg.bytes_per_line;
 485 
 486     while (!feof(w)) {
 487         os.buflen = fread(&buf, sizeof(buf[0]), max, src);
 488         if (os.buflen < 1) {
 489             // assume input is over when no bytes were read
 490             fflush(w);
 491             return;
 492         }
 493 
 494         for (size_t i = 0; i < os.buflen; i += one_line) {
 495             size_t end;
 496 
 497             // remember all ASCII symbols in current pair of output lines
 498             end = i + two_lines < os.buflen ? i + two_lines : os.buflen;
 499             peek_ascii(i, end, &os);
 500 
 501             // show current output line
 502             end = i + one_line < os.buflen ? i + one_line : os.buflen;
 503             cfg.emit_line(w, i, end, &os);
 504         }
 505 
 506         if (has_line_feeds(buf, os.buflen)) {
 507             fflush(w);
 508         }
 509     }
 510 }
 511 
 512 // handle_file handles data from the filename given; returns false only when
 513 // the file can't be opened
 514 bool handle_file(FILE* w, const char* path, config cfg) {
 515     FILE* f = fopen(path, "rb");
 516     if (f == NULL) {
 517         // ensure currently-buffered/deferred output shows up right now: not
 518         // doing so may scramble results in the common case where stdout and
 519         // stderr are the same, thus confusing users
 520         fflush(w);
 521 
 522         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 523         return false;
 524     }
 525 
 526     // get the file size
 527     struct stat st;
 528     fstat(fileno(f), &st);
 529 
 530     // show output
 531     cfg.emit_file_info(w, path, st.st_size);
 532     EMIT_CONST(w, "\n");
 533     handle_reader(w, f, cfg);
 534 
 535     fclose(f);
 536     return true;
 537 }
 538 
 539 // is_help_option simplifies control-flow for func run
 540 bool is_help_option(char* s) {
 541     return (s[0] == '-') && (
 542         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 543         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 544     );
 545 }
 546 
 547 // is_plain_option simplifies control-flow for func run
 548 bool is_plain_option(char* s) {
 549     return (s[0] == '-') && (
 550         strcmp(s, "-p") == 0 || strcmp(s, "-plain") == 0 ||
 551         strcmp(s, "--p") == 0 || strcmp(s, "--plain") == 0
 552     );
 553 }
 554 
 555 // is_hex_offsets simplifies control-flow for func run
 556 bool is_hex_offsets_option(char* s) {
 557     return (s[0] == '-') && (
 558         strcmp(s, "-ho") == 0 || strcmp(s, "--ho") == 0 ||
 559         strcmp(s, "-hexoffsets") == 0 || strcmp(s, "--hexoffsets") == 0 ||
 560         strcmp(s, "-hex-offsets") == 0 || strcmp(s, "--hex-offsets") == 0
 561     );
 562 }
 563 
 564 // run returns the number of errors
 565 int run(int argc, char** argv, FILE* w) {
 566     config cfg;
 567     cfg.bytes_per_line = 16;
 568     cfg.emit_line = &emit_styled_line;
 569     cfg.emit_file_info = &emit_styled_file_info;
 570     cfg.emit_offset = &write_styled_uint;
 571 
 572     size_t files = 0;
 573     size_t errors = 0;
 574 
 575     // handle all filenames/options given
 576     for (size_t i = 1; i < argc && !feof(w); i++) {
 577         // a `-` filename stands for the standard input
 578         if (argv[i][0] == '-' && argv[i][1] == 0) {
 579             EMIT_CONST(w, "• <stdin>\n");
 580             EMIT_CONST(w, "\n");
 581             handle_reader(w, stdin, cfg);
 582             continue;
 583         }
 584 
 585         if (is_help_option(argv[i])) {
 586             // help option quits the app right away
 587             fprintf(stderr, "%s", info);
 588             return 0;
 589         }
 590 
 591         if (is_plain_option(argv[i])) {
 592             cfg.emit_line = &emit_plain_line;
 593             cfg.emit_file_info = &emit_plain_file_info;
 594             continue;
 595         }
 596 
 597         if (is_hex_offsets_option(argv[i])) {
 598             cfg.emit_offset = &write_hex_uint;
 599             continue;
 600         }
 601 
 602         if (files > 0) {
 603             // put an empty line between adjacent hex outputs
 604             putc('\n', w);
 605         }
 606 
 607         if (!handle_file(w, argv[i], cfg)) {
 608             errors++;
 609         }
 610         files++;
 611     }
 612 
 613     // no filenames means use stdin as the only input
 614     if (files == 0) {
 615         EMIT_CONST(w, "• <stdin>\n");
 616         EMIT_CONST(w, "\n");
 617         handle_reader(w, stdin, cfg);
 618     }
 619 
 620     return errors;
 621 }
 622 
 623 int main(int argc, char** argv) {
 624 #ifdef _WIN32
 625     setmode(fileno(stdin), O_BINARY);
 626     // ensure output lines end in LF instead of CRLF on windows
 627     setmode(fileno(stdout), O_BINARY);
 628     setmode(fileno(stderr), O_BINARY);
 629 #endif
 630 
 631     return run(argc, argv, stdout) == 0 ? 0 : 1;
 632 }