File: nn.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27     cc -Wall -s -O2 -o ./nn ./nn.c
  28 */
  29 
  30 #include <fcntl.h>
  31 #include <stdbool.h>
  32 #include <stddef.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 
  37 // info is the message shown when this app is given any of its help options
  38 const char* info =
  39     ""
  40     "nn [options...] [filepaths...]\n"
  41     "\n"
  42     "\n"
  43     "Nice Numbers is an app which renders the plain text it's given to make long\n"
  44     "numbers much easier to read, by alternating 3-digit groups which are colored\n"
  45     "using ANSI-codes with unstyled ones.\n"
  46     "\n"
  47     "Unlike the common practice of inserting commas between 3-digit groups, this\n"
  48     "alternative doesn't widen the original text, keeping any alignments the same.\n"
  49     "\n"
  50     "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  51     "from the standard input.\n"
  52     "\n"
  53     "\n"
  54     "Options, all of which can start with either 1 or 2 dashes:\n"
  55     "\n"
  56     "\n"
  57     "  -blue     use a blue-like color to alternate-style runs of digits\n"
  58     "  -bold     use a bold style/effect to alternate-style runs of digits\n"
  59     "  -gray     use a gray color to alternate-style runs of digits\n"
  60     "  -green    use a green color to alternate-style runs of digits\n"
  61     "  -inverse  invert/swap colors to alternate-style runs of digits\n"
  62     "  -orange   use an orange color to alternate-style runs of digits\n"
  63     "  -purple   use a purple color to alternate-style runs of digits\n"
  64     "  -red      use a red color to alternate-style runs of digits\n"
  65     "\n"
  66     "  -h          show this help message\n"
  67     "  -help       show this help message\n"
  68     "\n"
  69     "  -highlight  same as option -inverse\n"
  70     "  -hilite     same as option -inverse\n"
  71     "";
  72 
  73 // slice is a growable region of bytes in memory
  74 typedef struct slice {
  75     // ptr is the starting place of the region
  76     unsigned char* ptr;
  77 
  78     // len is how many bytes are currently being used
  79     size_t len;
  80 
  81     // cap is how many bytes the memory region has available
  82     size_t cap;
  83 } slice;
  84 
  85 // new_slice is the constructor for type slice
  86 slice new_slice(size_t cap) {
  87     slice res;
  88     res.cap = cap;
  89     res.len = 0;
  90     res.ptr = malloc(res.cap);
  91     return res;
  92 }
  93 
  94 // advance updates a slice so it starts after the number of bytes given
  95 inline void advance(slice* src, size_t n) {
  96     src->ptr += n;
  97     src->len -= n;
  98 }
  99 
 100 // first creates a slice ending at the number of bytes given
 101 slice first(slice src, size_t n) {
 102     src.len = n;
 103     return src;
 104 }
 105 
 106 // append_byte does as it says, potentially reallocating the memory area
 107 // backing the slice given
 108 void append_byte(slice* s, unsigned char b) {
 109     if (s->len < s->cap) {
 110         // under capacity, so it's ok to append directly
 111         s->ptr[s->len] = b;
 112         s->len++;
 113         return;
 114     }
 115 
 116     // slice is full, so double it and reallocate
 117     s->cap *= 2;
 118     s->ptr = realloc(s->ptr, s->cap);
 119 
 120     // now append directly to the larger array
 121     s->ptr[s->len] = b;
 122     s->len++;
 123 }
 124 
 125 // find_lf returns the index of the first line-feed found, or a negative value
 126 // on failure
 127 long long int find_lf(slice s) {
 128     for (size_t i = 0; i < s.len; i++) {
 129         if (s.ptr[i] == '\n') {
 130             return i;
 131         }
 132     }
 133     return -1;
 134 }
 135 
 136 // find_digit returns the index of the first digit found, or a negative value
 137 // on failure
 138 long long int find_digit(slice s) {
 139     for (size_t i = 0; i < s.len; i++) {
 140         const unsigned char b = s.ptr[i];
 141         if ('0' <= b && b <= '9') {
 142             return i;
 143         }
 144     }
 145     return -1;
 146 }
 147 
 148 // find_non_digit returns the index of the first non-digit found, or a negative
 149 // value on failure
 150 long long int find_non_digit(slice s) {
 151     for (size_t i = 0; i < s.len; i++) {
 152         const unsigned char b = s.ptr[i];
 153         if (b < '0' || b > '9') {
 154             return i;
 155         }
 156     }
 157     return -1;
 158 }
 159 
 160 const unsigned char reset_style[] = "\x1b[0m";
 161 
 162 // bufreader is a way to speed up reading data by reducing the frequency of
 163 // data reads from the a data source, while still allowing reading 1 byte at
 164 // a time
 165 typedef struct bufreader {
 166     // buf is the buffer, (re)filled periodically as needed
 167     unsigned char* buf;
 168 
 169     // len is how many buffer bytes are being used, out of its max capacity
 170     size_t len;
 171 
 172     // cap is the buffer's capacity, or the most bytes it can hold at once
 173     size_t cap;
 174 
 175     // pos is the current position, up to the current buffer length
 176     size_t pos;
 177 
 178     // src is the data source used to fill the buffer
 179     FILE* src;
 180 } bufreader;
 181 
 182 // new_bufreader is the constructor for type bufreader
 183 bufreader new_bufreader(FILE* src, size_t cap) {
 184     bufreader res;
 185     res.cap = cap;
 186     res.len = 0;
 187     res.pos = 0;
 188     res.src = src;
 189     res.buf = malloc(res.cap);
 190     return res;
 191 }
 192 
 193 // close_bufreader deallocates the buffer
 194 void close_bufreader(bufreader* r) {
 195     free(r->buf);
 196     r->buf = NULL;
 197     r->len = 0;
 198 }
 199 
 200 // read_byte does as it says: check its return for the value EOF, before
 201 // using it as the next byte
 202 int read_byte(bufreader* r) {
 203     if (r->pos < r->len) {
 204         // inside current chunk
 205         const unsigned char b = r->buf[r->pos];
 206         r->pos++;
 207         return b;
 208     }
 209 
 210     // need to read the next block
 211     r->pos = 0;
 212     r->len = fread(r->buf, sizeof(unsigned char), r->cap, r->src);
 213     if (r->len > 0) {
 214         const unsigned char b = r->buf[r->pos];
 215         r->pos++;
 216         return b;
 217     }
 218 
 219     // reached the end of data
 220     return EOF;
 221 }
 222 
 223 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at
 224 // stdout, it considerably speeds up this app, as intended
 225 typedef struct bufwriter {
 226     // buf is the buffer proper
 227     unsigned char* buf;
 228 
 229     // len is how many bytes of the buffer are currently being used
 230     size_t len;
 231 
 232     // cap is the capacity of the buffer, or the most bytes it can hold
 233     size_t cap;
 234 
 235     // out is the destination of all that's written into the buffer
 236     FILE* out;
 237 
 238     // done signals when/if no more output is accepted at the destination
 239     bool done;
 240 } bufwriter;
 241 
 242 // new_bufwriter is the constructor for type bufwriter
 243 bufwriter new_bufwriter(FILE* dst, size_t cap) {
 244     bufwriter res;
 245     res.cap = cap;
 246     res.done = false;
 247     res.len = 0;
 248     res.out = dst;
 249     res.buf = malloc(res.cap);
 250     return res;
 251 }
 252 
 253 // flush does as it says: it empties the buffer after ensuring its bytes end
 254 // on their intended destination
 255 void flush(bufwriter* w) {
 256     if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) {
 257         w->done = true;
 258     }
 259     w->len = 0;
 260 }
 261 
 262 // close_bufwriter ensures all output is shown and deallocates the buffer
 263 void close_bufwriter(bufwriter* w) {
 264     flush(w);
 265     free(w->buf);
 266     w->buf = NULL;
 267 }
 268 
 269 // write_bytes does as it says, minimizing the number of calls to fwrite
 270 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) {
 271     if (w->len + len < w->cap) {
 272         // all bytes fit into buffer
 273         memcpy(w->buf + w->len, src, len);
 274         w->len += len;
 275         return;
 276     }
 277 
 278     // ensure current buffer bytes go out, before crossing strides
 279     flush(w);
 280 
 281     // emit all chunks striding beyond/at the buffer's capacity
 282     for (; len >= w->cap; src += w->cap, len -= w->cap) {
 283         if (fwrite(src, w->cap, 1, w->out) < 1) {
 284             w->done = true;
 285             return;
 286         }
 287     }
 288 
 289     // now all, if any, remaining bytes will fit into the buffer
 290     memcpy(w->buf, src, len);
 291     w->len += len;
 292 }
 293 
 294 // write_byte does as it says
 295 void write_byte(bufwriter* w, unsigned char b) {
 296     if (w->len >= w->cap) {
 297         flush(w);
 298     }
 299     w->buf[w->len] = b;
 300     w->len++;
 301 }
 302 
 303 // restyle_digits renders a run of digits as alternating styled/unstyled runs
 304 // of 3 digits, which greatly improves readability, and is the only purpose
 305 // of this app; string is assumed to be all decimal digits
 306 void restyle_digits(bufwriter* w, slice digits, const unsigned char* style) {
 307     if (digits.len < 4) {
 308         // digit sequence is short, so emit it as is
 309         write_bytes(w, digits.ptr, digits.len);
 310         return;
 311     }
 312 
 313     // separate leading 0..2 digits which don't align with the 3-digit groups
 314     size_t lead = digits.len % 3;
 315     // emit leading digits unstyled, if there are any
 316     write_bytes(w, digits.ptr, lead);
 317     // the rest is guaranteed to have a length which is a multiple of 3
 318     advance(&digits, lead);
 319 
 320     size_t style_len = strlen((const char*)style);
 321     // start with the alternate style, unless there were no leading digits
 322     bool style_now = lead != 0;
 323 
 324     while (digits.len > 0) {
 325         if (style_now) {
 326             write_bytes(w, style, style_len);
 327             write_bytes(w, digits.ptr, 3);
 328             write_bytes(w, reset_style, sizeof(reset_style) - 1);
 329         } else {
 330             write_bytes(w, digits.ptr, 3);
 331         }
 332 
 333         advance(&digits, 3);
 334         // alternate between styled and unstyled 3-digit groups
 335         style_now = !style_now;
 336     }
 337 }
 338 
 339 // restyle_line renders the line given, using ANSI-styles to make any long
 340 // numbers in it more legible
 341 void restyle_line(bufwriter* w, slice line, const unsigned char* alt_style) {
 342     while (!w->done && line.len > 0) {
 343         long int i = find_digit(line);
 344         if (i < 0) {
 345             // no (more) digits for sure
 346             write_bytes(w, line.ptr, line.len);
 347             return;
 348         }
 349 
 350         // some ANSI-style sequences use 4-digit numbers, which are long
 351         // enough for this app to mangle
 352         const unsigned char* p = line.ptr;
 353         bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '[';
 354 
 355         // emit line before current digit-run
 356         write_bytes(w, line.ptr, i);
 357 
 358         advance(&line, i);
 359 
 360         // see where the digit-run ends
 361         long int j = find_non_digit(line);
 362         if (j < 0) {
 363             // the digit-run goes until the end
 364             if (!is_ansi) {
 365                 restyle_digits(w, line, alt_style);
 366             } else {
 367                 write_bytes(w, line.ptr, line.len);
 368             }
 369             return;
 370         }
 371 
 372         // emit styled digit-run... maybe
 373         if (!is_ansi) {
 374             slice s;
 375             s.ptr = line.ptr;
 376             s.len = j;
 377             restyle_digits(w, s, alt_style);
 378         } else {
 379             write_bytes(w, line.ptr, j);
 380         }
 381 
 382         // skip right past the end of the digit-run
 383         advance(&line, j);
 384     }
 385 }
 386 
 387 /*
 388 The info-message string below was made by running the command
 389 
 390 awk 'BEGIN { print "const char* info = \"\"" }
 391      { printf "\"%s\\n\"\n", $0 }
 392      END { print "\"\";" }' info.txt
 393 */
 394 
 395 // default_digits_style makes it easy to change the built-in default style
 396 const unsigned char default_digits_style[] = "\x1b[38;5;248m";
 397 
 398 // buffer_size is trying to be a good value for modern CPU cores
 399 const size_t buffer_size = 32 * 1024;
 400 
 401 // handle_reader loops over input lines, restyling all digit-runs as more
 402 // readable `nice numbers`, fulfilling the app's purpose
 403 void handle_reader(bufwriter* w, FILE* src, const unsigned char* style) {
 404     unsigned char prev = 0;
 405     bufreader r = new_bufreader(src, buffer_size);
 406     slice line = new_slice(buffer_size);
 407 
 408     while (!w->done) {
 409         int v = read_byte(&r);
 410         if (v != EOF) {
 411             // still more bytes to go
 412             unsigned char b = v;
 413             prev = b;
 414 
 415             if (b != '\n') {
 416                 // no end of line yet
 417                 append_byte(&line, b);
 418                 continue;
 419             }
 420 
 421             // end of line
 422             append_byte(&line, b);
 423             restyle_line(w, line, style);
 424             line.len = 0;
 425             continue;
 426         }
 427 
 428         // input is over
 429         break;
 430     }
 431 
 432     // don't forget the last line
 433     restyle_line(w, line, style);
 434 
 435     // ensure last output line ends with a line-feed since, at least on
 436     // msys/windows, `less` hangs when lines with millions of symbols
 437     // don't end with a lf
 438     if (prev != '\n') {
 439         write_byte(w, '\n');
 440     }
 441 
 442     close_bufreader(&r);
 443     free(line.ptr);
 444 }
 445 
 446 // handle_file handles data from the filename given; returns false only when
 447 // the file can't be opened
 448 bool handle_file(bufwriter* w, char* fname, const unsigned char* style) {
 449     FILE* f = fopen(fname, "rb");
 450     if (f == NULL) {
 451         // ensure currently-buffered/deferred output shows up right now: not
 452         // doing so may scramble results in the common case where stdout and
 453         // stderr are the same, thus confusing users
 454         flush(w);
 455 
 456         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname);
 457         return false;
 458     }
 459 
 460     handle_reader(w, f, style);
 461     fclose(f);
 462     return true;
 463 }
 464 
 465 // run returns the number of errors
 466 size_t run(int argc, char** argv) {
 467     char* style = (char*)default_digits_style;
 468     bufwriter w = new_bufwriter(stdout, buffer_size);
 469 
 470     // handle leading options to change the ANSI-style used
 471     size_t start = 1;
 472     if (argc > 1 && argv[start][0] == '-') {
 473         char* s = argv[start] + (argv[start][1] == '-' ? 2 : 1);
 474         if (strcmp(s, "blue") == 0) {
 475             style = "\x1b[38;5;26m";
 476             start++;
 477         } else if (strcmp(s, "bold") == 0) {
 478             style = "\x1b[1m";
 479             start++;
 480         } else if (strcmp(s, "green") == 0) {
 481             style = "\x1b[38;5;29m";
 482             start++;
 483         } else if (strcmp(s, "gray") == 0) {
 484             style = "\x1b[38;5;248m";
 485             start++;
 486         } else if (strcmp(s, "highlight") == 0) {
 487             style = "\x1b[7m";
 488             start++;
 489         } else if (strcmp(s, "hilite") == 0) {
 490             style = "\x1b[7m";
 491             start++;
 492         } else if (strcmp(s, "inverse") == 0) {
 493             style = "\x1b[7m";
 494             start++;
 495         } else if (strcmp(s, "invert") == 0) {
 496             style = "\x1b[7m";
 497             start++;
 498         } else if (strcmp(s, "orange") == 0) {
 499             style = "\x1b[38;5;166m";
 500             start++;
 501         } else if (strcmp(s, "purple") == 0) {
 502             style = "\x1b[38;5;99m";
 503             start++;
 504         } else if (strcmp(s, "red") == 0) {
 505             style = "\x1b[31m";
 506             start++;
 507         }
 508     }
 509 
 510     const unsigned char* alt_style = (const unsigned char*)style;
 511 
 512     // use stdin when not given any filepaths
 513     if ((size_t)argc <= start) {
 514         handle_reader(&w, stdin, alt_style);
 515         close_bufwriter(&w);
 516         return 0;
 517     }
 518 
 519     size_t errors = 0;
 520     for (size_t i = start; i < (size_t)argc && !w.done; i++) {
 521         if (i > start) {
 522             // put an extra empty line between adjacent outputs
 523             write_byte(&w, '\n');
 524         }
 525 
 526         if (!handle_file(&w, argv[i], alt_style)) {
 527             errors++;
 528         }
 529     }
 530 
 531     close_bufwriter(&w);
 532     return errors;
 533 }
 534 
 535 int main(int argc, char** argv) {
 536 #ifdef _WIN32
 537     setmode(fileno(stdin), O_BINARY);
 538     // ensure output lines end in LF instead of CRLF on windows
 539     setmode(fileno(stdout), O_BINARY);
 540     setmode(fileno(stderr), O_BINARY);
 541 #endif
 542 
 543     // handle any of the help options, if given
 544     if (argc > 1 && argv[1][0] == '-') {
 545         const char* s = argv[1] + (argv[1][1] == '-' ? 2 : 1);
 546         if (strcmp(s, "h") == 0 || strcmp(s, "help") == 0) {
 547             puts(info);
 548             return 0;
 549         }
 550     }
 551 
 552     // disable automatic stdio buffering, in favor of explicit buffering
 553     setvbuf(stdin, NULL, _IONBF, 0);
 554     setvbuf(stdout, NULL, _IONBF, 0);
 555     setvbuf(stderr, NULL, _IONBF, 0);
 556 
 557     return run(argc, argv) == 0 ? 0 : 1;
 558 }