File: nn.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27     cc -Wall -s -O2 -o ./nn ./nn.c
  28 */
  29 
  30 #include <fcntl.h>
  31 #include <stdbool.h>
  32 #include <stddef.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 
  37 #ifdef _WIN32
  38 #include <windows.h>
  39 #endif
  40 
  41 // #define COMPACT_OUTPUT
  42 
  43 // info is the message shown when this app is given any of its help options
  44 const char* info = ""
  45 "nn [options...] [filepaths...]\n"
  46 "\n"
  47 "\n"
  48 "Nice Numbers is an app which renders the plain text it's given to make long\n"
  49 "numbers much easier to read, by alternating 3-digit groups which are colored\n"
  50 "using ANSI-codes with unstyled ones.\n"
  51 "\n"
  52 "Unlike the common practice of inserting commas between 3-digit groups, this\n"
  53 "alternative doesn't widen the original text, keeping any alignments the same.\n"
  54 "\n"
  55 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  56 "from the standard input.\n"
  57 "\n"
  58 "\n"
  59 "Options, all of which can start with either 1 or 2 dashes:\n"
  60 "\n"
  61 "\n"
  62 "  -blue     use a blue-like color to alternate-style runs of digits\n"
  63 "  -bold     use a bold style/effect to alternate-style runs of digits\n"
  64 "  -gray     use a gray color to alternate-style runs of digits\n"
  65 "  -green    use a green color to alternate-style runs of digits\n"
  66 "  -inverse  invert/swap colors to alternate-style runs of digits\n"
  67 "  -orange   use an orange color to alternate-style runs of digits\n"
  68 "  -purple   use a purple color to alternate-style runs of digits\n"
  69 "  -red      use a red color to alternate-style runs of digits\n"
  70 "\n"
  71 "  -h          show this help message\n"
  72 "  -help       show this help message\n"
  73 "\n"
  74 "  -highlight  same as option -inverse\n"
  75 "  -hilite     same as option -inverse\n"
  76 "";
  77 
  78 const char* line_memory_error_msg =
  79     ""
  80     "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n";
  81 
  82 // slice is a growable region of bytes in memory
  83 typedef struct slice {
  84     // ptr is the starting place of the region
  85     unsigned char* ptr;
  86 
  87     // len is how many bytes are currently being used
  88     size_t len;
  89 
  90     // cap is how many bytes the memory region has available
  91     size_t cap;
  92 } slice;
  93 
  94 // init_slice is the constructor for type slice
  95 void init_slice(slice* s, size_t cap) {
  96     s->ptr = malloc(cap);
  97     s->len = 0;
  98     s->cap = cap;
  99 }
 100 
 101 // advance updates a slice so it starts after the number of bytes given
 102 inline void advance(slice* src, size_t n) {
 103     src->ptr += n;
 104     src->len -= n;
 105 }
 106 
 107 // find_digit returns the index of the first digit found, or a negative value
 108 // on failure
 109 long long int find_digit(slice s) {
 110     for (size_t i = 0; i < s.len; i++) {
 111         const unsigned char b = s.ptr[i];
 112         if ('0' <= b && b <= '9') {
 113             return i;
 114         }
 115     }
 116     return -1;
 117 }
 118 
 119 // find_non_digit returns the index of the first non-digit found, or a negative
 120 // value on failure
 121 long long int find_non_digit(slice s) {
 122     for (size_t i = 0; i < s.len; i++) {
 123         const unsigned char b = s.ptr[i];
 124         if (b < '0' || b > '9') {
 125             return i;
 126         }
 127     }
 128     return -1;
 129 }
 130 
 131 const unsigned char reset_style[] = "\x1b[0m";
 132 
 133 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at
 134 // stdout, it considerably speeds up this app, as intended
 135 typedef struct bufwriter {
 136     // buf is the buffer proper
 137     unsigned char* buf;
 138 
 139     // len is how many bytes of the buffer are currently being used
 140     size_t len;
 141 
 142     // cap is the capacity of the buffer, or the most bytes it can hold
 143     size_t cap;
 144 
 145     // out is the destination of all that's written into the buffer
 146     FILE* out;
 147 
 148     // done signals when/if no more output is accepted at the destination
 149     bool done;
 150 } bufwriter;
 151 
 152 // init_bufwriter is the constructor for type bufwriter
 153 void init_bufwriter(bufwriter* w, FILE* dst, unsigned char* buf, size_t cap) {
 154     w->buf = buf;
 155     w->len = 0;
 156     w->cap = cap;
 157     w->out = dst;
 158     w->done = false;
 159 }
 160 
 161 // flush does as it says: it empties the buffer after ensuring its bytes end
 162 // on their intended destination
 163 void flush(bufwriter* w) {
 164     if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) {
 165         w->done = true;
 166     }
 167     w->len = 0;
 168 }
 169 
 170 // write_bytes does as it says, minimizing the number of calls to fwrite
 171 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) {
 172     if (w->len + len < w->cap) {
 173         // all bytes fit into buffer
 174         memcpy(w->buf + w->len, src, len);
 175         w->len += len;
 176         return;
 177     }
 178 
 179     // ensure current buffer bytes go out, before crossing strides
 180     flush(w);
 181 
 182     // emit all chunks striding beyond/at the buffer's capacity
 183     for (; len >= w->cap; src += w->cap, len -= w->cap) {
 184         if (fwrite(src, w->cap, 1, w->out) < 1) {
 185             w->done = true;
 186             return;
 187         }
 188     }
 189 
 190     // now all, if any, remaining bytes will fit into the buffer
 191     memcpy(w->buf, src, len);
 192     w->len += len;
 193 }
 194 
 195 // write_byte does as it says
 196 void write_byte(bufwriter* w, unsigned char b) {
 197     if (w->len >= w->cap) {
 198         flush(w);
 199     }
 200     w->buf[w->len] = b;
 201     w->len++;
 202 }
 203 
 204 // restyle_digits renders a run of digits as alternating styled/unstyled runs
 205 // of 3 digits, which greatly improves readability, and is the only purpose
 206 // of this app; string is assumed to be all decimal digits
 207 void restyle_digits(bufwriter* w, slice digits, slice style) {
 208     if (digits.len < 4) {
 209         // digit sequence is short, so emit it as is
 210         write_bytes(w, digits.ptr, digits.len);
 211         return;
 212     }
 213 
 214     // separate leading 0..2 digits which don't align with the 3-digit groups
 215     size_t lead = digits.len % 3;
 216     // emit leading digits unstyled, if there are any
 217     write_bytes(w, digits.ptr, lead);
 218     // the rest is guaranteed to have a length which is a multiple of 3
 219     advance(&digits, lead);
 220 
 221     // start with the alternate style, unless there were no leading digits
 222     bool style_now = lead != 0;
 223 
 224     while (digits.len > 0) {
 225         if (style_now) {
 226             write_bytes(w, style.ptr, style.len);
 227             write_bytes(w, digits.ptr, 3);
 228             write_bytes(w, reset_style, sizeof(reset_style) - 1);
 229         } else {
 230             write_bytes(w, digits.ptr, 3);
 231         }
 232 
 233         advance(&digits, 3);
 234         // alternate between styled and unstyled 3-digit groups
 235         style_now = !style_now;
 236     }
 237 }
 238 
 239 // restyle_line renders the line given, using ANSI-styles to make any long
 240 // numbers in it more legible
 241 void restyle_line(bufwriter* w, slice line, slice alt_style) {
 242     while (!w->done && line.len > 0) {
 243         long int i = find_digit(line);
 244         if (i < 0) {
 245             // no (more) digits for sure
 246             write_bytes(w, line.ptr, line.len);
 247             flush(w);
 248             return;
 249         }
 250 
 251         // some ANSI-style sequences use 4-digit numbers, which are long
 252         // enough for this app to mangle
 253         const unsigned char* p = line.ptr;
 254         bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '[';
 255 
 256         // emit line before current digit-run
 257         write_bytes(w, line.ptr, i);
 258 
 259         advance(&line, i);
 260 
 261         // see where the digit-run ends
 262         long int j = find_non_digit(line);
 263         if (j < 0) {
 264             // the digit-run goes until the end
 265             if (!is_ansi) {
 266                 restyle_digits(w, line, alt_style);
 267             } else {
 268                 write_bytes(w, line.ptr, line.len);
 269             }
 270             flush(w);
 271             return;
 272         }
 273 
 274         // emit styled digit-run... maybe
 275         if (!is_ansi) {
 276             slice s;
 277             s.ptr = line.ptr;
 278             s.len = j;
 279             restyle_digits(w, s, alt_style);
 280         } else {
 281             write_bytes(w, line.ptr, j);
 282         }
 283 
 284         // skip right past the end of the digit-run
 285         advance(&line, j);
 286     }
 287 }
 288 
 289 // default_digits_style makes it easy to change the built-in default style
 290 unsigned char default_digits_style[] = "\x1b[38;5;248m";
 291 
 292 typedef struct handler_args {
 293     bufwriter* w;
 294     slice* line;
 295     slice style;
 296 } handler_args;
 297 
 298 bool bom_start(slice s) {
 299     const unsigned char* p = s.ptr;
 300     return s.len >= 3 && p[0] == 0xef && p[0] == 0xbb && p[0] == 0xbf;
 301 }
 302 
 303 // handle_lines loops over input lines, restyling all digit-runs as more
 304 // readable `nice numbers`, fulfilling the app's purpose
 305 bool handle_lines(handler_args args, FILE* src) {
 306     bufwriter* w = args.w;
 307     slice* line = args.line;
 308     slice trimmed;
 309 
 310     for (size_t i = 0; !w->done; i++) {
 311         int len = getline((char**)&line->ptr, &line->cap, src);
 312         if (len < 0) {
 313             break;
 314         }
 315         if (line->ptr == NULL) {
 316             fprintf(stderr, line_memory_error_msg);
 317             exit(1);
 318         }
 319 
 320         line->len = len;
 321         trimmed.ptr = line->ptr;
 322         trimmed.len = line->len;
 323 
 324         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 325         if (i == 0 && bom_start(trimmed)) {
 326             trimmed.ptr += 3;
 327         }
 328 
 329         const unsigned char* p = trimmed.ptr;
 330         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 331         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 332             trimmed.len -= 2;
 333         } else if (len >= 1 && p[len - 1] == '\n') {
 334             trimmed.len--;
 335         }
 336 
 337         restyle_line(w, trimmed, args.style);
 338         write_byte(w, '\n');
 339         flush(w);
 340     }
 341 
 342     flush(w);
 343     return true;
 344 }
 345 
 346 // handle_file handles data from the filename given; returns false only when
 347 // the file can't be opened
 348 bool handle_file(handler_args args, char* path) {
 349     FILE* f = fopen(path, "rb");
 350     if (f == NULL) {
 351         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 352         return false;
 353     }
 354 
 355     const bool ok = handle_lines(args, f);
 356     fclose(f);
 357     return ok;
 358 }
 359 
 360 const char *style_names_aliases[] = {
 361     "b", "blue",
 362     "g", "green",
 363     "h", "inverse",
 364     "i", "inverse",
 365     "m", "magenta",
 366     "o", "orange",
 367     "p", "purple",
 368     "r", "red",
 369     "u", "underline",
 370 
 371     "hi", "inverse",
 372     "ma", "magenta",
 373     "or", "orange",
 374     "un", "underline",
 375 
 376     "inv", "inverse",
 377     "mag", "magenta",
 378 
 379     "grey", "gray",
 380     "highlight", "inverse",
 381     "highlighted", "inverse",
 382     "hilite", "inverse",
 383     "hilited", "inverse",
 384     "invert", "inverse",
 385     "inverted", "inverse",
 386     "underlined", "underline",
 387 
 388     "bb", "blueback",
 389     "gb", "greenback",
 390     "mb", "magentaback",
 391     "ob", "orangeback",
 392     "pb", "purpleback",
 393     "rb", "redback",
 394 
 395     "greyback", "grayback",
 396 };
 397 
 398 #ifdef COMPACT_OUTPUT
 399 char *styles[] = {
 400     "blue", "\x1b[38;5;26m",
 401     "bold", "\x1b[1m",
 402     "gray", "\x1b[38;5;248m",
 403     "green", "\x1b[38;5;29m",
 404     "inverse", "\x1b[7m",
 405     "magenta", "\x1b[38;5;165m",
 406     "orange", "\x1b[38;5;166m",
 407     "purple", "\x1b[38;5;99m",
 408     "red", "\x1b[38;5;1m",
 409     "underline", "\x1b[4m",
 410 
 411     "blueback", "\x1b[48;5;26m\x1b[38;5;15m",
 412     "grayback", "\x1b[48;5;248m\x1b[38;5;15m",
 413     "greenback", "\x1b[48;5;29m\x1b[38;5;15m",
 414     "magentaback", "\x1b[48;5;165m\x1b[38;5;15m",
 415     "orangeback", "\x1b[48;5;166m\x1b[38;5;15m",
 416     "purpleback", "\x1b[48;5;99m\x1b[38;5;15m",
 417     "redback", "\x1b[48;5;1m\x1b[38;5;15m",
 418 };
 419 #else
 420 char *styles[] = {
 421     "blue", "\x1b[38;2;0;95;215m",
 422     "bold", "\x1b[1m",
 423     "gray", "\x1b[38;2;168;168;168m",
 424     "green", "\x1b[38;2;0;135;95m",
 425     "inverse", "\x1b[7m",
 426     "magenta", "\x1b[38;2;215;0;255m",
 427     "orange", "\x1b[38;2;215;95;0m",
 428     "purple", "\x1b[38;2;135;95;255m",
 429     "red", "\x1b[38;2;204;0;0m",
 430     "underline", "\x1b[4m",
 431 
 432     "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 433     "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 434     "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 435     "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 436     "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 437     "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 438     "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 439 };
 440 #endif
 441 
 442 // run returns the number of errors
 443 size_t run(int argc, char** argv, slice* line) {
 444     unsigned char* style = default_digits_style;
 445 
 446     bufwriter w;
 447     unsigned char obuf[48 * 1024];
 448     init_bufwriter(&w, stdout, obuf, sizeof(obuf));
 449 
 450     // handle leading options to change the ANSI-style used
 451     size_t start = 1;
 452     if (argc > 1 && argv[start][0] == '-') {
 453         const char* s = argv[start] + (argv[start][1] == '-' ? 2 : 1);
 454 
 455         // resolve style-name aliases
 456         const size_t n = sizeof(style_names_aliases) / sizeof(char *);
 457         for (size_t i = 0; i < n; i += 2) {
 458             if (strcmp(s, style_names_aliases[i]) == 0) {
 459                 s = style_names_aliases[i + 1];
 460                 break;
 461             }
 462         }
 463 
 464         // find ANSI-code for the style-name given
 465         size_t found = 0;
 466         for (size_t i = 0; i < sizeof(styles) / sizeof(char *); i += 2) {
 467             if (strcmp(s, styles[i]) == 0) {
 468                 start++;
 469                 found = 1;
 470                 style = (unsigned char*)styles[i + 1];
 471                 break;
 472             }
 473         }
 474 
 475         if (found == 0) {
 476             fprintf(stderr, "\x1b[31munsupported style named %s\x1b[0m\n", s);
 477             return 1;
 478         }
 479     }
 480 
 481     size_t errors = 0;
 482     handler_args args;
 483     args.w = &w;
 484     args.line = line;
 485     args.style.ptr = style;
 486     args.style.len = strlen((char*)style);
 487 
 488     // use stdin when not given any filepaths
 489     if ((size_t)argc <= start) {
 490         if (!handle_lines(args, stdin)) {
 491             errors++;
 492         }
 493         return errors;
 494     }
 495 
 496     for (size_t i = start; i < (size_t)argc && !w.done; i++) {
 497         if (i > start) {
 498             // put an extra empty line between adjacent outputs
 499             write_byte(&w, '\n');
 500         }
 501 
 502         if (argv[i][0] == '-' && argv[i][1] == 0) {
 503             if (!handle_lines(args, stdin)) {
 504                 errors++;
 505             }
 506             continue;
 507         }
 508 
 509         if (!handle_file(args, argv[i])) {
 510             errors++;
 511         }
 512     }
 513 
 514     return errors;
 515 }
 516 
 517 // is_help_option simplifies control-flow for func main
 518 bool is_help_option(char* s) {
 519     return (s[0] == '-') && (
 520         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 521         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 522     );
 523 }
 524 
 525 int main(int argc, char** argv) {
 526 #ifdef _WIN32
 527     setmode(fileno(stdin), O_BINARY);
 528     // ensure output lines end in LF instead of CRLF on windows
 529     setmode(fileno(stdout), O_BINARY);
 530     setmode(fileno(stderr), O_BINARY);
 531 #endif
 532 
 533     // handle any of the help options, if given
 534     if (argc > 1 && is_help_option(argv[1])) {
 535         puts(info);
 536         return 0;
 537     }
 538 
 539     // disable automatic stdio buffering, in favor of explicit buffering
 540     setvbuf(stdin, NULL, _IONBF, 0);
 541     setvbuf(stdout, NULL, _IONBF, 0);
 542     setvbuf(stderr, NULL, _IONBF, 0);
 543 
 544     slice line;
 545     init_slice(&line, 16 * 1024);
 546     if (line.ptr == NULL) {
 547         fprintf(stderr, line_memory_error_msg);
 548         return 1;
 549     }
 550 
 551     const int res = run(argc, argv, &line) == 0 ? 0 : 1;
 552     free(line.ptr);
 553     return res;
 554 }