File: nn.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./nn ./nn.c
  29 
  30 Building with COMPACT_OUTPUT defined makes `nn` output many fewer bytes, at
  31 the cost of using arguably worse colors. You can do that by running
  32 
  33 cc -s -O2 -march=native -mtune=native -flto -D COMPACT_OUTPUT -o ./nh ./nh.c
  34 */
  35 
  36 #include <stdbool.h>
  37 #include <stddef.h>
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <unistd.h>
  42 
  43 #ifdef _WIN32
  44 #include <fcntl.h>
  45 #include <windows.h>
  46 #endif
  47 
  48 #ifdef RED_ERRORS
  49 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  50 #ifdef __APPLE__
  51 #define ERROR_STYLE "\x1b[31m"
  52 #endif
  53 #define RESET_STYLE "\x1b[0m"
  54 #else
  55 #define ERROR_STYLE
  56 #define RESET_STYLE
  57 #endif
  58 
  59 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  60 
  61 #define BAD_ALLOC 2
  62 
  63 // #define COMPACT_OUTPUT
  64 
  65 // EMIT_CONST emits string constants without their final null byte
  66 #define EMIT_CONST(w, x) fwrite(x, 1, sizeof(x) - 1, w)
  67 
  68 const char* info = ""
  69 "nn [options...] [filepaths...]\n"
  70 "\n"
  71 "\n"
  72 "Nice Numbers is an app which renders the plain text it's given to make long\n"
  73 "numbers much easier to read, by alternating 3-digit groups which are colored\n"
  74 "using ANSI-codes with unstyled ones.\n"
  75 "\n"
  76 "Unlike the common practice of inserting commas between 3-digit groups, this\n"
  77 "alternative doesn't widen the original text, keeping any alignments the same.\n"
  78 "\n"
  79 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  80 "from the standard input.\n"
  81 "\n"
  82 "\n"
  83 "Options, all of which can start with either 1 or 2 dashes:\n"
  84 "\n"
  85 "\n"
  86 "  -blue     use a blue-like color to alternate-style runs of digits\n"
  87 "  -bold     use a bold style/effect to alternate-style runs of digits\n"
  88 "  -gray     use a gray color to alternate-style runs of digits\n"
  89 "  -green    use a green color to alternate-style runs of digits\n"
  90 "  -inverse  invert/swap colors to alternate-style runs of digits\n"
  91 "  -orange   use an orange color to alternate-style runs of digits\n"
  92 "  -purple   use a purple color to alternate-style runs of digits\n"
  93 "  -red      use a red color to alternate-style runs of digits\n"
  94 "\n"
  95 "  -h          show this help message\n"
  96 "  -help       show this help message\n"
  97 "\n"
  98 "  -highlight  same as option -inverse\n"
  99 "  -hilite     same as option -inverse\n"
 100 "";
 101 
 102 // span is a region of bytes in memory
 103 typedef struct span {
 104     // ptr is the starting place of the region
 105     unsigned char* ptr;
 106 
 107     // len is how many bytes are in the region
 108     size_t len;
 109 } span;
 110 
 111 // advance updates a span so it starts after the number of bytes given
 112 static inline void advance(span* src, size_t n) {
 113     src->ptr += n;
 114     src->len -= n;
 115 }
 116 
 117 // slice is a growable region of bytes in memory
 118 typedef struct slice {
 119     // ptr is the starting place of the region
 120     unsigned char* ptr;
 121 
 122     // cap is how many bytes the memory region has available
 123     size_t cap;
 124 } slice;
 125 
 126 // find_digit returns the index of the first digit found, or a negative value
 127 // on failure
 128 static inline int64_t find_digit(span s) {
 129     for (size_t i = 0; i < s.len; i++) {
 130         const unsigned char b = s.ptr[i];
 131         if ('0' <= b && b <= '9') {
 132             return i;
 133         }
 134     }
 135     return -1;
 136 }
 137 
 138 // find_non_digit returns the index of the first non-digit found, or a negative
 139 // value on failure
 140 static inline int64_t find_non_digit(span s) {
 141     for (size_t i = 0; i < s.len; i++) {
 142         const unsigned char b = s.ptr[i];
 143         if (b < '0' || b > '9') {
 144             return i;
 145         }
 146     }
 147     return -1;
 148 }
 149 
 150 // restyle_digits renders a run of digits as alternating styled/unstyled runs
 151 // of 3 digits, which greatly improves readability, and is the only purpose
 152 // of this app; string is assumed to be all decimal digits
 153 void restyle_digits(FILE* w, span digits, span style) {
 154     if (digits.len < 4) {
 155         // digit sequence is short, so emit it as is
 156         fwrite(digits.ptr, 1, digits.len, w);
 157         return;
 158     }
 159 
 160     // separate leading 0..2 digits which don't align with the 3-digit groups
 161     size_t lead = digits.len % 3;
 162     // emit leading digits unstyled, if there are any
 163     fwrite(digits.ptr, 1, lead, w);
 164     // the rest is guaranteed to have a length which is a multiple of 3
 165     advance(&digits, lead);
 166 
 167     // start with the alternate style, unless there were no leading digits
 168     bool style_now = lead != 0;
 169 
 170     while (digits.len > 0) {
 171         if (style_now) {
 172             fwrite(style.ptr, 1, style.len, w);
 173             fwrite(digits.ptr, 1, 3, w);
 174             EMIT_CONST(w, "\x1b[0m");
 175         } else {
 176             fwrite(digits.ptr, 1, 3, w);
 177         }
 178 
 179         advance(&digits, 3);
 180         // alternate between styled and unstyled 3-digit groups
 181         style_now = !style_now;
 182     }
 183 }
 184 
 185 // restyle_line renders the line given, using ANSI-styles to make any long
 186 // numbers in it more legible
 187 void restyle_line(FILE* w, unsigned char* s, size_t len, span style) {
 188     span line;
 189     line.ptr = s;
 190     line.len = len;
 191 
 192     while (!feof(w) && line.len > 0) {
 193         int64_t i = find_digit(line);
 194         if (i < 0) {
 195             // no (more) digits for sure
 196             fwrite(line.ptr, 1, line.len, w);
 197             return;
 198         }
 199 
 200         // some ANSI-style sequences use 4-digit numbers, which are long
 201         // enough for this app to mangle
 202         bool is_ansi = i >= 2 && s[i - 2] == '\x1b' && s[i - 1] == '[';
 203 
 204         // emit line before current digit-run
 205         fwrite(line.ptr, 1, i, w);
 206 
 207         advance(&line, i);
 208 
 209         // see where the digit-run ends
 210         int64_t j = find_non_digit(line);
 211         if (j < 0) {
 212             // the digit-run goes until the end
 213             if (!is_ansi) {
 214                 restyle_digits(w, line, style);
 215             } else {
 216                 fwrite(line.ptr, 1, line.len, w);
 217             }
 218             return;
 219         }
 220 
 221         // emit styled digit-run... maybe
 222         if (!is_ansi) {
 223             span chunk;
 224             chunk.ptr = line.ptr;
 225             chunk.len = j;
 226             restyle_digits(w, chunk, style);
 227         } else {
 228             fwrite(line.ptr, 1, j, w);
 229         }
 230 
 231         // skip right past the end of the digit-run
 232         advance(&line, j);
 233     }
 234 }
 235 
 236 // default_digits_style makes it easy to change the built-in default style
 237 #ifdef COMPACT_OUTPUT
 238 unsigned char default_digits_style[] = "\x1b[38;5;248m";
 239 #else
 240 unsigned char default_digits_style[] = "\x1b[38;2;168;168;168m";
 241 #endif
 242 
 243 typedef struct handler_args {
 244     FILE* w;
 245     slice* line;
 246     span style;
 247 } handler_args;
 248 
 249 bool starts_with_bom(const unsigned char* p, size_t len) {
 250     return len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 251 }
 252 
 253 // handle_lines loops over input lines, restyling all digit-runs as more
 254 // readable `nice numbers`, fulfilling the app's purpose
 255 void handle_lines(handler_args args, FILE* src, bool live_lines) {
 256     FILE* w = args.w;
 257     slice* line = args.line;
 258 
 259     for (size_t i = 0; !feof(w); i++) {
 260         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 261         if (line->ptr == NULL) {
 262             fprintf(stderr, "\n");
 263             fprintf(stderr, ERROR_LINE("out of memory"));
 264             exit(BAD_ALLOC);
 265         }
 266 
 267         if (len < 0) {
 268             break;
 269         }
 270 
 271         unsigned char* ptr = line->ptr;
 272 
 273         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 274         if (i == 0 && starts_with_bom(ptr, len)) {
 275             ptr += 3;
 276             len -= 3;
 277         }
 278 
 279         // replace trailing carriage-returns with line-feeds
 280         if (len >= 1 && ptr[len - 1] == '\r') {
 281             ptr[len - 1] = '\n';
 282         }
 283 
 284         // get rid of carriage-returns preceding line-feeds
 285         if (len >= 2 && ptr[len - 2] == '\r' && ptr[len - 1] == '\n') {
 286             ptr[len - 2] = '\n';
 287             len--;
 288         }
 289 
 290         restyle_line(w, ptr, len, args.style);
 291         if (len < 1 || ptr[len - 1] != '\n') {
 292             fputc('\n', w);
 293         }
 294     }
 295 
 296     if (!live_lines) {
 297         fflush(w);
 298     }
 299 }
 300 
 301 // handle_file handles data from the filename given; returns false only when
 302 // the file can't be opened
 303 bool handle_file(handler_args args, const char* path, bool live_lines) {
 304     FILE* f = fopen(path, "rb");
 305     if (f == NULL) {
 306         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 307         return false;
 308     }
 309 
 310     handle_lines(args, f, live_lines);
 311     fclose(f);
 312     return true;
 313 }
 314 
 315 const char* style_names_aliases[] = {
 316     "b", "blue",
 317     "g", "green",
 318     "h", "inverse",
 319     "i", "inverse",
 320     "m", "magenta",
 321     "o", "orange",
 322     "p", "purple",
 323     "r", "red",
 324     "u", "underline",
 325 
 326     "hi", "inverse",
 327     "ma", "magenta",
 328     "or", "orange",
 329     "un", "underline",
 330 
 331     "inv", "inverse",
 332     "mag", "magenta",
 333 
 334     "grey", "gray",
 335     "highlight", "inverse",
 336     "highlighted", "inverse",
 337     "hilite", "inverse",
 338     "hilited", "inverse",
 339     "invert", "inverse",
 340     "inverted", "inverse",
 341     "underlined", "underline",
 342 
 343     "bb", "blueback",
 344     "gb", "greenback",
 345     "mb", "magentaback",
 346     "ob", "orangeback",
 347     "pb", "purpleback",
 348     "rb", "redback",
 349 
 350     "greyback", "grayback",
 351 };
 352 
 353 #ifdef COMPACT_OUTPUT
 354 char* styles[] = {
 355     "blue", "\x1b[38;5;26m",
 356     "bold", "\x1b[1m",
 357     "gray", "\x1b[38;5;248m",
 358     "green", "\x1b[38;5;29m",
 359     "inverse", "\x1b[7m",
 360     "magenta", "\x1b[38;5;165m",
 361     "orange", "\x1b[38;5;166m",
 362     "purple", "\x1b[38;5;99m",
 363     "red", "\x1b[38;5;1m",
 364     "underline", "\x1b[4m",
 365 
 366     "blueback", "\x1b[48;5;26m\x1b[38;5;15m",
 367     "grayback", "\x1b[48;5;248m\x1b[38;5;15m",
 368     "greenback", "\x1b[48;5;29m\x1b[38;5;15m",
 369     "magentaback", "\x1b[48;5;165m\x1b[38;5;15m",
 370     "orangeback", "\x1b[48;5;166m\x1b[38;5;15m",
 371     "purpleback", "\x1b[48;5;99m\x1b[38;5;15m",
 372     "redback", "\x1b[48;5;1m\x1b[38;5;15m",
 373 };
 374 #else
 375 char* styles[] = {
 376     "blue", "\x1b[38;2;0;95;215m",
 377     "bold", "\x1b[1m",
 378     "gray", "\x1b[38;2;168;168;168m",
 379     "green", "\x1b[38;2;0;135;95m",
 380     "inverse", "\x1b[7m",
 381     "magenta", "\x1b[38;2;215;0;255m",
 382     "orange", "\x1b[38;2;215;95;0m",
 383     "purple", "\x1b[38;2;135;95;255m",
 384     "red", "\x1b[38;2;204;0;0m",
 385     "underline", "\x1b[4m",
 386 
 387     "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 388     "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 389     "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 390     "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 391     "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 392     "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 393     "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 394 };
 395 #endif
 396 
 397 bool change_style(const char* arg, span* style) {
 398     // style-changing options must have 1 or 2 leading dashes
 399     if (arg[0] != '-') {
 400         return false;
 401     }
 402 
 403     // skip up to 2 leading dashes
 404     const char* s = arg + (arg[1] == '-' ? 2 : 1);
 405 
 406     // resolve style-name aliases
 407     const size_t n = sizeof(style_names_aliases) / sizeof(char*);
 408     for (size_t i = 0; i < n; i += 2) {
 409         if (strcmp(s, style_names_aliases[i]) == 0) {
 410             s = style_names_aliases[i + 1];
 411             break;
 412         }
 413     }
 414 
 415     // try to find ANSI-code for the style-name given
 416     for (size_t i = 0; i < sizeof(styles) / sizeof(char*); i += 2) {
 417         if (strcmp(s, styles[i]) == 0) {
 418             style->ptr = (unsigned char*)styles[i + 1];
 419             style->len = strlen(styles[i + 1]);
 420             return true;
 421         }
 422     }
 423 
 424     return false;
 425 }
 426 
 427 // run returns the number of errors
 428 int run(char** args, size_t nargs, FILE* w, bool live_lines) {
 429     size_t dashes = 0;
 430     for (int i = 0; i < nargs; i++) {
 431         if (strcmp(args[i], "-") == 0) {
 432             dashes++;
 433         }
 434     }
 435 
 436     if (dashes > 1) {
 437         const char* m = "can't use the standard input (dash) more than once";
 438         fprintf(stderr, ERROR_LINE("%s"), m);
 439         return 1;
 440     }
 441 
 442     size_t files = 0;
 443     size_t errors = 0;
 444 
 445     slice line;
 446     line.cap = 32 * 1024;
 447     line.ptr = malloc(line.cap);
 448 
 449     if (line.ptr == NULL) {
 450         fprintf(stderr, ERROR_LINE("out of memory"));
 451         exit(BAD_ALLOC);
 452     }
 453 
 454     handler_args ha;
 455     ha.w = w;
 456     ha.line = &line;
 457     ha.style.ptr = default_digits_style;
 458     ha.style.len = strlen((char*)default_digits_style);
 459 
 460     bool options = true;
 461 
 462     for (size_t i = 0; i < nargs && !feof(w); i++) {
 463         const char* arg = args[i];
 464 
 465         // `--` means no more options
 466         if (arg[0] == '-' && arg[1] == '-' && arg[2] == 0) {
 467             options = false;
 468             continue;
 469         }
 470 
 471         // `-` means standard input
 472         if (arg[0] == '-' && arg[1] == 0) {
 473             handle_lines(ha, stdin, live_lines);
 474             files++;
 475             continue;
 476         }
 477 
 478         if (options && arg[0] == '-') {
 479             if (!change_style(arg, &ha.style)) {
 480                 fprintf(stderr, ERROR_LINE("unsupported style named %s"), arg);
 481                 errors++;
 482             }
 483             continue;
 484         }
 485 
 486         if (!handle_file(ha, arg, live_lines)) {
 487             errors++;
 488         }
 489         files++;
 490     }
 491 
 492     // use stdin when not given any filepaths
 493     if (files == 0 && !feof(w)) {
 494         handle_lines(ha, stdin, live_lines);
 495     }
 496 
 497     free(line.ptr);
 498     return errors;
 499 }
 500 
 501 int main(int argc, char** argv) {
 502 #ifdef _WIN32
 503     setmode(fileno(stdin), O_BINARY);
 504     // ensure output lines end in LF instead of CRLF on windows
 505     setmode(fileno(stdout), O_BINARY);
 506     setmode(fileno(stderr), O_BINARY);
 507 #endif
 508 
 509     if (argc > 1) {
 510         if (
 511             strcmp(argv[1], "-h") == 0 ||
 512             strcmp(argv[1], "-help") == 0 ||
 513             strcmp(argv[1], "--h") == 0 ||
 514             strcmp(argv[1], "--help") == 0
 515         ) {
 516             fprintf(stdout, "%s", info);
 517             return 0;
 518         }
 519     }
 520 
 521     size_t nargs = argc - 1;
 522     char** args = argv + 1;
 523     bool buffered = false;
 524 
 525     if (nargs > 0) {
 526         if (
 527             strcmp(args[0], "-buffered") == 0 ||
 528             strcmp(args[0], "--buffered") == 0
 529         ) {
 530             buffered = true;
 531             nargs--;
 532             args++;
 533         }
 534     }
 535 
 536     if (nargs > 0 && strcmp(args[0], "--") == 0) {
 537         nargs--;
 538         args++;
 539     }
 540 
 541     const int fd = fileno(stdout);
 542     const bool live_lines = !buffered && lseek(fd, 0, SEEK_CUR) != 0;
 543     if (live_lines) {
 544         setvbuf(stdout, NULL, _IOLBF, 0);
 545     } else {
 546         setvbuf(stdout, NULL, _IOFBF, 0);
 547     }
 548     return run(args, nargs, stdout, live_lines) == 0 ? 0 : 1;
 549 }