File: nn.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./nn ./nn.c
  29 
  30 Building with COMPACT_OUTPUT defined makes `nn` output many fewer bytes, at
  31 the cost of using arguably worse colors. You can do that by running
  32 
  33 cc -s -O3 -march=native -mtune=native -flto -D COMPACT_OUTPUT -o ./nh ./nh.c
  34 */
  35 
  36 #include <stdbool.h>
  37 #include <stddef.h>
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <unistd.h>
  42 
  43 #ifdef _WIN32
  44 #include <fcntl.h>
  45 #include <windows.h>
  46 #endif
  47 
  48 #ifdef RED_ERRORS
  49 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  50 #ifdef __APPLE__
  51 #define ERROR_STYLE "\x1b[31m"
  52 #endif
  53 #define RESET_STYLE "\x1b[0m"
  54 #else
  55 #define ERROR_STYLE
  56 #define RESET_STYLE
  57 #endif
  58 
  59 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  60 
  61 #define BAD_ALLOC 2
  62 
  63 // #define COMPACT_OUTPUT
  64 
  65 // EMIT_CONST emits string constants without their final null byte
  66 #define EMIT_CONST(w, x) fwrite(x, 1, sizeof(x) - 1, w)
  67 
  68 const char* info = ""
  69 "nn [options...] [filepaths...]\n"
  70 "\n"
  71 "\n"
  72 "Nice Numbers is an app which renders the plain text it's given to make long\n"
  73 "numbers much easier to read, by alternating 3-digit groups which are colored\n"
  74 "using ANSI-codes with unstyled ones.\n"
  75 "\n"
  76 "Unlike the common practice of inserting commas between 3-digit groups, this\n"
  77 "alternative doesn't widen the original text, keeping any alignments the same.\n"
  78 "\n"
  79 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  80 "from the standard input.\n"
  81 "\n"
  82 "\n"
  83 "Options, all of which can start with either 1 or 2 dashes:\n"
  84 "\n"
  85 "\n"
  86 "  -blue     use a blue-like color to alternate-style runs of digits\n"
  87 "  -bold     use a bold style/effect to alternate-style runs of digits\n"
  88 "  -gray     use a gray color to alternate-style runs of digits\n"
  89 "  -green    use a green color to alternate-style runs of digits\n"
  90 "  -inverse  invert/swap colors to alternate-style runs of digits\n"
  91 "  -orange   use an orange color to alternate-style runs of digits\n"
  92 "  -purple   use a purple color to alternate-style runs of digits\n"
  93 "  -red      use a red color to alternate-style runs of digits\n"
  94 "\n"
  95 "  -h          show this help message\n"
  96 "  -help       show this help message\n"
  97 "\n"
  98 "  -highlight  same as option -inverse\n"
  99 "  -hilite     same as option -inverse\n"
 100 "";
 101 
 102 // span is a region of bytes in memory
 103 typedef struct span {
 104     // ptr is the starting place of the region
 105     unsigned char* ptr;
 106 
 107     // len is how many bytes are in the region
 108     size_t len;
 109 } span;
 110 
 111 // advance updates a span so it starts after the number of bytes given
 112 static inline void advance(span* src, size_t n) {
 113     src->ptr += n;
 114     src->len -= n;
 115 }
 116 
 117 // slice is a growable region of bytes in memory
 118 typedef struct slice {
 119     // ptr is the starting place of the region
 120     unsigned char* ptr;
 121 
 122     // cap is how many bytes the memory region has available
 123     size_t cap;
 124 } slice;
 125 
 126 // find_digit returns the index of the first digit found, or a negative value
 127 // on failure
 128 static inline int64_t find_digit(span s) {
 129     for (size_t i = 0; i < s.len; i++) {
 130         const unsigned char b = s.ptr[i];
 131         if ('0' <= b && b <= '9') {
 132             return i;
 133         }
 134     }
 135     return -1;
 136 }
 137 
 138 // find_non_digit returns the index of the first non-digit found, or a negative
 139 // value on failure
 140 static inline int64_t find_non_digit(span s) {
 141     for (size_t i = 0; i < s.len; i++) {
 142         const unsigned char b = s.ptr[i];
 143         if (b < '0' || b > '9') {
 144             return i;
 145         }
 146     }
 147     return -1;
 148 }
 149 
 150 // restyle_digits renders a run of digits as alternating styled/unstyled runs
 151 // of 3 digits, which greatly improves readability, and is the only purpose
 152 // of this app; string is assumed to be all decimal digits
 153 void restyle_digits(FILE* w, span digits, span style) {
 154     if (digits.len < 4) {
 155         // digit sequence is short, so emit it as is
 156         fwrite(digits.ptr, 1, digits.len, w);
 157         return;
 158     }
 159 
 160     // separate leading 0..2 digits which don't align with the 3-digit groups
 161     size_t lead = digits.len % 3;
 162     // emit leading digits unstyled, if there are any
 163     fwrite(digits.ptr, 1, lead, w);
 164     // the rest is guaranteed to have a length which is a multiple of 3
 165     advance(&digits, lead);
 166 
 167     // start with the alternate style, unless there were no leading digits
 168     bool style_now = lead != 0;
 169 
 170     while (digits.len > 0) {
 171         if (style_now) {
 172             fwrite(style.ptr, 1, style.len, w);
 173             fwrite(digits.ptr, 1, 3, w);
 174             EMIT_CONST(w, "\x1b[0m");
 175         } else {
 176             fwrite(digits.ptr, 1, 3, w);
 177         }
 178 
 179         advance(&digits, 3);
 180         // alternate between styled and unstyled 3-digit groups
 181         style_now = !style_now;
 182     }
 183 }
 184 
 185 // restyle_line renders the line given, using ANSI-styles to make any long
 186 // numbers in it more legible
 187 void restyle_line(FILE* w, unsigned char* s, size_t len, span style) {
 188     span line;
 189     line.ptr = s;
 190     line.len = len;
 191 
 192     while (!feof(w) && line.len > 0) {
 193         int64_t i = find_digit(line);
 194         if (i < 0) {
 195             // no (more) digits for sure
 196             fwrite(line.ptr, 1, line.len, w);
 197             return;
 198         }
 199 
 200         // some ANSI-style sequences use 4-digit numbers, which are long
 201         // enough for this app to mangle
 202         bool is_ansi = i >= 2 && s[i - 2] == '\x1b' && s[i - 1] == '[';
 203 
 204         // emit line before current digit-run
 205         fwrite(line.ptr, 1, i, w);
 206 
 207         advance(&line, i);
 208 
 209         // see where the digit-run ends
 210         int64_t j = find_non_digit(line);
 211         if (j < 0) {
 212             // the digit-run goes until the end
 213             if (!is_ansi) {
 214                 restyle_digits(w, line, style);
 215             } else {
 216                 fwrite(line.ptr, 1, line.len, w);
 217             }
 218             return;
 219         }
 220 
 221         // emit styled digit-run... maybe
 222         if (!is_ansi) {
 223             span chunk;
 224             chunk.ptr = line.ptr;
 225             chunk.len = j;
 226             restyle_digits(w, chunk, style);
 227         } else {
 228             fwrite(line.ptr, 1, j, w);
 229         }
 230 
 231         // skip right past the end of the digit-run
 232         advance(&line, j);
 233     }
 234 }
 235 
 236 // default_digits_style makes it easy to change the built-in default style
 237 #ifdef COMPACT_OUTPUT
 238 unsigned char default_digits_style[] = "\x1b[38;5;248m";
 239 #else
 240 unsigned char default_digits_style[] = "\x1b[38;2;168;168;168m";
 241 #endif
 242 
 243 typedef struct handler_args {
 244     FILE* w;
 245     slice* line;
 246     span style;
 247 } handler_args;
 248 
 249 bool starts_with_bom(const unsigned char* p, size_t len) {
 250     return len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 251 }
 252 
 253 // handle_lines loops over input lines, restyling all digit-runs as more
 254 // readable `nice numbers`, fulfilling the app's purpose
 255 void handle_lines(handler_args args, FILE* src, bool live_lines) {
 256     FILE* w = args.w;
 257     slice* line = args.line;
 258 
 259     for (size_t i = 0; !feof(w); i++) {
 260         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 261         if (line->ptr == NULL) {
 262             fprintf(stderr, "\n");
 263             fprintf(stderr, ERROR_LINE("out of memory"));
 264             exit(BAD_ALLOC);
 265         }
 266 
 267         if (len < 0) {
 268             break;
 269         }
 270 
 271         unsigned char* ptr = line->ptr;
 272 
 273         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 274         if (i == 0 && starts_with_bom(ptr, len)) {
 275             ptr += 3;
 276             len -= 3;
 277         }
 278 
 279         // replace trailing carriage-returns with line-feeds
 280         if (len >= 1 && ptr[len - 1] == '\r') {
 281             ptr[len - 1] = '\n';
 282         }
 283 
 284         // get rid of carriage-returns preceding line-feeds
 285         if (len >= 2 && ptr[len - 2] == '\r' && ptr[len - 1] == '\n') {
 286             ptr[len - 2] = '\n';
 287             len--;
 288         }
 289 
 290         restyle_line(w, ptr, len, args.style);
 291         if (len < 1 || ptr[len - 1] != '\n') {
 292             fputc('\n', w);
 293         }
 294         if (live_lines) {
 295             fflush(w);
 296         }
 297     }
 298 
 299     if (!live_lines) {
 300         fflush(w);
 301     }
 302 }
 303 
 304 // handle_file handles data from the filename given; returns false only when
 305 // the file can't be opened
 306 bool handle_file(handler_args args, const char* path, bool live_lines) {
 307     FILE* f = fopen(path, "rb");
 308     if (f == NULL) {
 309         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 310         return false;
 311     }
 312 
 313     handle_lines(args, f, live_lines);
 314     fclose(f);
 315     return true;
 316 }
 317 
 318 const char* style_names_aliases[] = {
 319     "b", "blue",
 320     "g", "green",
 321     "h", "inverse",
 322     "i", "inverse",
 323     "m", "magenta",
 324     "o", "orange",
 325     "p", "purple",
 326     "r", "red",
 327     "u", "underline",
 328 
 329     "hi", "inverse",
 330     "ma", "magenta",
 331     "or", "orange",
 332     "un", "underline",
 333 
 334     "inv", "inverse",
 335     "mag", "magenta",
 336 
 337     "grey", "gray",
 338     "highlight", "inverse",
 339     "highlighted", "inverse",
 340     "hilite", "inverse",
 341     "hilited", "inverse",
 342     "invert", "inverse",
 343     "inverted", "inverse",
 344     "underlined", "underline",
 345 
 346     "bb", "blueback",
 347     "gb", "greenback",
 348     "mb", "magentaback",
 349     "ob", "orangeback",
 350     "pb", "purpleback",
 351     "rb", "redback",
 352 
 353     "greyback", "grayback",
 354 };
 355 
 356 #ifdef COMPACT_OUTPUT
 357 char* styles[] = {
 358     "blue", "\x1b[38;5;26m",
 359     "bold", "\x1b[1m",
 360     "gray", "\x1b[38;5;248m",
 361     "green", "\x1b[38;5;29m",
 362     "inverse", "\x1b[7m",
 363     "magenta", "\x1b[38;5;165m",
 364     "orange", "\x1b[38;5;166m",
 365     "purple", "\x1b[38;5;99m",
 366     "red", "\x1b[38;5;1m",
 367     "underline", "\x1b[4m",
 368 
 369     "blueback", "\x1b[48;5;26m\x1b[38;5;15m",
 370     "grayback", "\x1b[48;5;248m\x1b[38;5;15m",
 371     "greenback", "\x1b[48;5;29m\x1b[38;5;15m",
 372     "magentaback", "\x1b[48;5;165m\x1b[38;5;15m",
 373     "orangeback", "\x1b[48;5;166m\x1b[38;5;15m",
 374     "purpleback", "\x1b[48;5;99m\x1b[38;5;15m",
 375     "redback", "\x1b[48;5;1m\x1b[38;5;15m",
 376 };
 377 #else
 378 char* styles[] = {
 379     "blue", "\x1b[38;2;0;95;215m",
 380     "bold", "\x1b[1m",
 381     "gray", "\x1b[38;2;168;168;168m",
 382     "green", "\x1b[38;2;0;135;95m",
 383     "inverse", "\x1b[7m",
 384     "magenta", "\x1b[38;2;215;0;255m",
 385     "orange", "\x1b[38;2;215;95;0m",
 386     "purple", "\x1b[38;2;135;95;255m",
 387     "red", "\x1b[38;2;204;0;0m",
 388     "underline", "\x1b[4m",
 389 
 390     "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 391     "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 392     "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 393     "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 394     "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 395     "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 396     "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 397 };
 398 #endif
 399 
 400 bool change_style(const char* arg, span* style) {
 401     // style-changing options must have 1 or 2 leading dashes
 402     if (arg[0] != '-') {
 403         return false;
 404     }
 405 
 406     // skip up to 2 leading dashes
 407     const char* s = arg + (arg[1] == '-' ? 2 : 1);
 408 
 409     // resolve style-name aliases
 410     const size_t n = sizeof(style_names_aliases) / sizeof(char*);
 411     for (size_t i = 0; i < n; i += 2) {
 412         if (strcmp(s, style_names_aliases[i]) == 0) {
 413             s = style_names_aliases[i + 1];
 414             break;
 415         }
 416     }
 417 
 418     // try to find ANSI-code for the style-name given
 419     for (size_t i = 0; i < sizeof(styles) / sizeof(char*); i += 2) {
 420         if (strcmp(s, styles[i]) == 0) {
 421             style->ptr = (unsigned char*)styles[i + 1];
 422             style->len = strlen(styles[i + 1]);
 423             return true;
 424         }
 425     }
 426 
 427     return false;
 428 }
 429 
 430 // run returns the number of errors
 431 int run(int argc, char** argv, FILE* w, bool live_lines) {
 432     size_t files = 0;
 433     size_t errors = 0;
 434 
 435     slice line;
 436     line.cap = 32 * 1024;
 437     line.ptr = malloc(line.cap);
 438 
 439     if (line.ptr == NULL) {
 440         fprintf(stderr, ERROR_LINE("out of memory"));
 441         exit(BAD_ALLOC);
 442     }
 443 
 444     handler_args args;
 445     args.w = w;
 446     args.line = &line;
 447     args.style.ptr = default_digits_style;
 448     args.style.len = strlen((char*)default_digits_style);
 449 
 450     for (size_t i = 1; i < (size_t)argc && !feof(w); i++) {
 451         const char* arg = argv[i];
 452 
 453         // `-` means standard input
 454         if (arg[0] == '-' && arg[1] == 0) {
 455             handle_lines(args, stdin, live_lines);
 456             files++;
 457             continue;
 458         }
 459 
 460         if (arg[0] == '-') {
 461             if (!change_style(arg, &args.style)) {
 462                 fprintf(stderr, ERROR_LINE("unsupported style named %s"), arg);
 463                 errors++;
 464             }
 465             continue;
 466         }
 467 
 468         if (!handle_file(args, arg, live_lines)) {
 469             errors++;
 470         }
 471         files++;
 472     }
 473 
 474     // use stdin when not given any filepaths
 475     if (files == 0) {
 476         handle_lines(args, stdin, live_lines);
 477     }
 478 
 479     free(line.ptr);
 480     return errors;
 481 }
 482 
 483 // is_help_option simplifies control-flow for func main
 484 bool is_help_option(const char* s) {
 485     return (s[0] == '-') && (
 486         strcmp(s, "-h") == 0 ||
 487         strcmp(s, "-help") == 0 ||
 488         strcmp(s, "--h") == 0 ||
 489         strcmp(s, "--help") == 0
 490     );
 491 }
 492 
 493 int main(int argc, char** argv) {
 494 #ifdef _WIN32
 495     setmode(fileno(stdin), O_BINARY);
 496     // ensure output lines end in LF instead of CRLF on windows
 497     setmode(fileno(stdout), O_BINARY);
 498     setmode(fileno(stderr), O_BINARY);
 499 #endif
 500 
 501     // handle any of the help options, if given
 502     if (argc > 1 && is_help_option(argv[1])) {
 503         printf("%s", info);
 504         return 0;
 505     }
 506 
 507     const bool live_lines = lseek(fileno(stdout), 0, SEEK_CUR) != 0;
 508     if (!live_lines) {
 509         setvbuf(stdout, NULL, _IOFBF, 0);
 510     }
 511     return run(argc, argv, stdout, live_lines) == 0 ? 0 : 1;
 512 }