File: nn.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./nn ./nn.c
  29 
  30 Building with COMPACT_OUTPUT defined makes `nn` output many fewer bytes, at
  31 the cost of using arguably worse colors. You can do that by running
  32 
  33 cc -s -O2 -march=native -mtune=native -flto -D COMPACT_OUTPUT -o ./nh ./nh.c
  34 */
  35 
  36 #include <stdbool.h>
  37 #include <stddef.h>
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <unistd.h>
  42 
  43 #ifdef _WIN32
  44 #include <fcntl.h>
  45 #include <windows.h>
  46 #endif
  47 
  48 #ifdef RED_ERRORS
  49 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  50 #ifdef __APPLE__
  51 #define ERROR_STYLE "\x1b[31m"
  52 #endif
  53 #define RESET_STYLE "\x1b[0m"
  54 #else
  55 #define ERROR_STYLE
  56 #define RESET_STYLE
  57 #endif
  58 
  59 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  60 
  61 #define BAD_ALLOC 2
  62 
  63 // #define COMPACT_OUTPUT
  64 
  65 // EMIT_CONST emits string constants without their final null byte
  66 #define EMIT_CONST(w, x) fwrite(x, 1, sizeof(x) - 1, w)
  67 
  68 const char* info = ""
  69 "nn [options...] [filepaths...]\n"
  70 "\n"
  71 "\n"
  72 "Nice Numbers is an app which renders the plain text it's given to make long\n"
  73 "numbers much easier to read, by alternating 3-digit groups which are colored\n"
  74 "using ANSI-codes with unstyled ones.\n"
  75 "\n"
  76 "Unlike the common practice of inserting commas between 3-digit groups, this\n"
  77 "alternative doesn't widen the original text, keeping any alignments the same.\n"
  78 "\n"
  79 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  80 "from the standard input.\n"
  81 "\n"
  82 "\n"
  83 "Options, all of which can start with either 1 or 2 dashes:\n"
  84 "\n"
  85 "\n"
  86 "  -blue     use a blue-like color to alternate-style runs of digits\n"
  87 "  -bold     use a bold style/effect to alternate-style runs of digits\n"
  88 "  -gray     use a gray color to alternate-style runs of digits\n"
  89 "  -green    use a green color to alternate-style runs of digits\n"
  90 "  -inverse  invert/swap colors to alternate-style runs of digits\n"
  91 "  -orange   use an orange color to alternate-style runs of digits\n"
  92 "  -purple   use a purple color to alternate-style runs of digits\n"
  93 "  -red      use a red color to alternate-style runs of digits\n"
  94 "\n"
  95 "  -h          show this help message\n"
  96 "  -help       show this help message\n"
  97 "\n"
  98 "  -highlight  same as option -inverse\n"
  99 "  -hilite     same as option -inverse\n"
 100 "";
 101 
 102 // span is a region of bytes in memory
 103 typedef struct span {
 104     // ptr is the starting place of the region
 105     unsigned char* ptr;
 106 
 107     // len is how many bytes are in the region
 108     size_t len;
 109 } span;
 110 
 111 // advance updates a span so it starts after the number of bytes given
 112 static inline void advance(span* src, size_t n) {
 113     src->ptr += n;
 114     src->len -= n;
 115 }
 116 
 117 // slice is a growable region of bytes in memory
 118 typedef struct slice {
 119     // ptr is the starting place of the region
 120     unsigned char* ptr;
 121 
 122     // cap is how many bytes the memory region has available
 123     size_t cap;
 124 } slice;
 125 
 126 // find_digit returns the index of the first digit found, or a negative value
 127 // on failure
 128 static inline int64_t find_digit(span s) {
 129     for (size_t i = 0; i < s.len; i++) {
 130         const unsigned char b = s.ptr[i];
 131         if ('0' <= b && b <= '9') {
 132             return i;
 133         }
 134     }
 135     return -1;
 136 }
 137 
 138 // find_non_digit returns the index of the first non-digit found, or a negative
 139 // value on failure
 140 static inline int64_t find_non_digit(span s) {
 141     for (size_t i = 0; i < s.len; i++) {
 142         const unsigned char b = s.ptr[i];
 143         if (b < '0' || b > '9') {
 144             return i;
 145         }
 146     }
 147     return -1;
 148 }
 149 
 150 // restyle_digits renders a run of digits as alternating styled/unstyled runs
 151 // of 3 digits, which greatly improves readability, and is the only purpose
 152 // of this app; string is assumed to be all decimal digits
 153 void restyle_digits(FILE* w, span digits, span style) {
 154     if (digits.len < 4) {
 155         // digit sequence is short, so emit it as is
 156         fwrite(digits.ptr, 1, digits.len, w);
 157         return;
 158     }
 159 
 160     // separate leading 0..2 digits which don't align with the 3-digit groups
 161     size_t lead = digits.len % 3;
 162     // emit leading digits unstyled, if there are any
 163     fwrite(digits.ptr, 1, lead, w);
 164     // the rest is guaranteed to have a length which is a multiple of 3
 165     advance(&digits, lead);
 166 
 167     // start with the alternate style, unless there were no leading digits
 168     bool style_now = lead != 0;
 169 
 170     while (digits.len > 0) {
 171         if (style_now) {
 172             fwrite(style.ptr, 1, style.len, w);
 173             fwrite(digits.ptr, 1, 3, w);
 174             EMIT_CONST(w, "\x1b[0m");
 175         } else {
 176             fwrite(digits.ptr, 1, 3, w);
 177         }
 178 
 179         advance(&digits, 3);
 180         // alternate between styled and unstyled 3-digit groups
 181         style_now = !style_now;
 182     }
 183 }
 184 
 185 // restyle_line renders the line given, using ANSI-styles to make any long
 186 // numbers in it more legible
 187 void restyle_line(FILE* w, unsigned char* s, size_t len, span style) {
 188     span line;
 189     line.ptr = s;
 190     line.len = len;
 191 
 192     while (!feof(w) && line.len > 0) {
 193         int64_t i = find_digit(line);
 194         if (i < 0) {
 195             // no (more) digits for sure
 196             fwrite(line.ptr, 1, line.len, w);
 197             return;
 198         }
 199 
 200         // some ANSI-style sequences use 4-digit numbers, which are long
 201         // enough for this app to mangle
 202         bool is_ansi = i >= 2 && s[i - 2] == '\x1b' && s[i - 1] == '[';
 203 
 204         // emit line before current digit-run
 205         fwrite(line.ptr, 1, i, w);
 206 
 207         advance(&line, i);
 208 
 209         // see where the digit-run ends
 210         int64_t j = find_non_digit(line);
 211         if (j < 0) {
 212             // the digit-run goes until the end
 213             if (!is_ansi) {
 214                 restyle_digits(w, line, style);
 215             } else {
 216                 fwrite(line.ptr, 1, line.len, w);
 217             }
 218             return;
 219         }
 220 
 221         // emit styled digit-run... maybe
 222         if (!is_ansi) {
 223             span chunk;
 224             chunk.ptr = line.ptr;
 225             chunk.len = j;
 226             restyle_digits(w, chunk, style);
 227         } else {
 228             fwrite(line.ptr, 1, j, w);
 229         }
 230 
 231         // skip right past the end of the digit-run
 232         advance(&line, j);
 233     }
 234 }
 235 
 236 // default_digits_style makes it easy to change the built-in default style
 237 #ifdef COMPACT_OUTPUT
 238 unsigned char default_digits_style[] = "\x1b[38;5;248m";
 239 #else
 240 unsigned char default_digits_style[] = "\x1b[38;2;168;168;168m";
 241 #endif
 242 
 243 typedef struct handler_args {
 244     FILE* w;
 245     slice* line;
 246     span style;
 247 } handler_args;
 248 
 249 bool starts_with_bom(const unsigned char* p, size_t len) {
 250     return len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 251 }
 252 
 253 // handle_lines loops over input lines, restyling all digit-runs as more
 254 // readable `nice numbers`, fulfilling the app's purpose
 255 void handle_lines(handler_args args, FILE* src, bool live_lines) {
 256     FILE* w = args.w;
 257     slice* line = args.line;
 258 
 259     for (size_t i = 0; !feof(w); i++) {
 260         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 261         if (line->ptr == NULL) {
 262             fprintf(stderr, "\n");
 263             fprintf(stderr, ERROR_LINE("out of memory"));
 264             exit(BAD_ALLOC);
 265         }
 266 
 267         if (len < 0) {
 268             break;
 269         }
 270 
 271         unsigned char* ptr = line->ptr;
 272 
 273         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 274         if (i == 0 && starts_with_bom(ptr, len)) {
 275             ptr += 3;
 276             len -= 3;
 277         }
 278 
 279         // replace trailing carriage-returns with line-feeds
 280         if (len >= 1 && ptr[len - 1] == '\r') {
 281             ptr[len - 1] = '\n';
 282         }
 283 
 284         // get rid of carriage-returns preceding line-feeds
 285         if (len >= 2 && ptr[len - 2] == '\r' && ptr[len - 1] == '\n') {
 286             ptr[len - 2] = '\n';
 287             len--;
 288         }
 289 
 290         restyle_line(w, ptr, len, args.style);
 291         if (len < 1 || ptr[len - 1] != '\n') {
 292             fputc('\n', w);
 293         }
 294     }
 295 
 296     if (!live_lines) {
 297         fflush(w);
 298     }
 299 }
 300 
 301 // handle_file handles data from the filename given; returns false only when
 302 // the file can't be opened
 303 bool handle_file(handler_args args, const char* path, bool live_lines) {
 304     FILE* f = fopen(path, "rb");
 305     if (f == NULL) {
 306         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 307         return false;
 308     }
 309 
 310     handle_lines(args, f, live_lines);
 311     fclose(f);
 312     return true;
 313 }
 314 
 315 const char* style_names_aliases[] = {
 316     "b", "blue",
 317     "g", "green",
 318     "h", "inverse",
 319     "i", "inverse",
 320     "m", "magenta",
 321     "o", "orange",
 322     "p", "purple",
 323     "r", "red",
 324     "u", "underline",
 325 
 326     "hi", "inverse",
 327     "ma", "magenta",
 328     "or", "orange",
 329     "un", "underline",
 330 
 331     "inv", "inverse",
 332     "mag", "magenta",
 333 
 334     "grey", "gray",
 335     "highlight", "inverse",
 336     "highlighted", "inverse",
 337     "hilite", "inverse",
 338     "hilited", "inverse",
 339     "invert", "inverse",
 340     "inverted", "inverse",
 341     "underlined", "underline",
 342 
 343     "bb", "blueback",
 344     "gb", "greenback",
 345     "mb", "magentaback",
 346     "ob", "orangeback",
 347     "pb", "purpleback",
 348     "rb", "redback",
 349 
 350     "greyback", "grayback",
 351 };
 352 
 353 #ifdef COMPACT_OUTPUT
 354 char* styles[] = {
 355     "blue", "\x1b[38;5;26m",
 356     "bold", "\x1b[1m",
 357     "gray", "\x1b[38;5;248m",
 358     "green", "\x1b[38;5;29m",
 359     "inverse", "\x1b[7m",
 360     "magenta", "\x1b[38;5;165m",
 361     "orange", "\x1b[38;5;166m",
 362     "purple", "\x1b[38;5;99m",
 363     "red", "\x1b[38;5;1m",
 364     "underline", "\x1b[4m",
 365 
 366     "blueback", "\x1b[48;5;26m\x1b[38;5;15m",
 367     "grayback", "\x1b[48;5;248m\x1b[38;5;15m",
 368     "greenback", "\x1b[48;5;29m\x1b[38;5;15m",
 369     "magentaback", "\x1b[48;5;165m\x1b[38;5;15m",
 370     "orangeback", "\x1b[48;5;166m\x1b[38;5;15m",
 371     "purpleback", "\x1b[48;5;99m\x1b[38;5;15m",
 372     "redback", "\x1b[48;5;1m\x1b[38;5;15m",
 373 };
 374 #else
 375 char* styles[] = {
 376     "blue", "\x1b[38;2;0;95;215m",
 377     "bold", "\x1b[1m",
 378     "gray", "\x1b[38;2;168;168;168m",
 379     "green", "\x1b[38;2;0;135;95m",
 380     "inverse", "\x1b[7m",
 381     "magenta", "\x1b[38;2;215;0;255m",
 382     "orange", "\x1b[38;2;215;95;0m",
 383     "purple", "\x1b[38;2;135;95;255m",
 384     "red", "\x1b[38;2;204;0;0m",
 385     "underline", "\x1b[4m",
 386 
 387     "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 388     "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 389     "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 390     "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 391     "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 392     "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 393     "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 394 };
 395 #endif
 396 
 397 bool change_style(const char* arg, span* style) {
 398     // style-changing options must have 1 or 2 leading dashes
 399     if (arg[0] != '-') {
 400         return false;
 401     }
 402 
 403     // skip up to 2 leading dashes
 404     const char* s = arg + (arg[1] == '-' ? 2 : 1);
 405 
 406     // resolve style-name aliases
 407     const size_t n = sizeof(style_names_aliases) / sizeof(char*);
 408     for (size_t i = 0; i < n; i += 2) {
 409         if (strcmp(s, style_names_aliases[i]) == 0) {
 410             s = style_names_aliases[i + 1];
 411             break;
 412         }
 413     }
 414 
 415     // try to find ANSI-code for the style-name given
 416     for (size_t i = 0; i < sizeof(styles) / sizeof(char*); i += 2) {
 417         if (strcmp(s, styles[i]) == 0) {
 418             style->ptr = (unsigned char*)styles[i + 1];
 419             style->len = strlen(styles[i + 1]);
 420             return true;
 421         }
 422     }
 423 
 424     return false;
 425 }
 426 
 427 // run returns the number of errors
 428 int run(int argc, char** argv, FILE* w, bool live_lines) {
 429     size_t files = 0;
 430     size_t errors = 0;
 431 
 432     slice line;
 433     line.cap = 32 * 1024;
 434     line.ptr = malloc(line.cap);
 435 
 436     if (line.ptr == NULL) {
 437         fprintf(stderr, ERROR_LINE("out of memory"));
 438         exit(BAD_ALLOC);
 439     }
 440 
 441     handler_args args;
 442     args.w = w;
 443     args.line = &line;
 444     args.style.ptr = default_digits_style;
 445     args.style.len = strlen((char*)default_digits_style);
 446 
 447     for (size_t i = 1; i < (size_t)argc && !feof(w); i++) {
 448         const char* arg = argv[i];
 449 
 450         // `-` means standard input
 451         if (arg[0] == '-' && arg[1] == 0) {
 452             handle_lines(args, stdin, live_lines);
 453             files++;
 454             continue;
 455         }
 456 
 457         if (arg[0] == '-') {
 458             if (!change_style(arg, &args.style)) {
 459                 fprintf(stderr, ERROR_LINE("unsupported style named %s"), arg);
 460                 errors++;
 461             }
 462             continue;
 463         }
 464 
 465         if (!handle_file(args, arg, live_lines)) {
 466             errors++;
 467         }
 468         files++;
 469     }
 470 
 471     // use stdin when not given any filepaths
 472     if (files == 0) {
 473         handle_lines(args, stdin, live_lines);
 474     }
 475 
 476     free(line.ptr);
 477     return errors;
 478 }
 479 
 480 // is_help_option simplifies control-flow for func main
 481 bool is_help_option(const char* s) {
 482     return (s[0] == '-') && (
 483         strcmp(s, "-h") == 0 ||
 484         strcmp(s, "-help") == 0 ||
 485         strcmp(s, "--h") == 0 ||
 486         strcmp(s, "--help") == 0
 487     );
 488 }
 489 
 490 int main(int argc, char** argv) {
 491 #ifdef _WIN32
 492     setmode(fileno(stdin), O_BINARY);
 493     // ensure output lines end in LF instead of CRLF on windows
 494     setmode(fileno(stdout), O_BINARY);
 495     setmode(fileno(stderr), O_BINARY);
 496 #endif
 497 
 498     // handle any of the help options, if given
 499     if (argc > 1 && is_help_option(argv[1])) {
 500         printf("%s", info);
 501         return 0;
 502     }
 503 
 504     const bool live_lines = lseek(fileno(stdout), 0, SEEK_CUR) != 0;
 505     if (live_lines) {
 506         setvbuf(stdout, NULL, _IOLBF, 0);
 507     } else {
 508         setvbuf(stdout, NULL, _IOFBF, 0);
 509     }
 510     return run(argc, argv, stdout, live_lines) == 0 ? 0 : 1;
 511 }