nn.c

     File: nn.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./nn ./nn.c
  29 
  30 Building with COMPACT_OUTPUT defined makes `nn` output many fewer bytes, at
  31 the cost of using arguably worse colors. You can do that by running
  32 
  33 cc -Wall -s -O2 -D COMPACT_OUTPUT -o ./nh ./nh.c
  34 */
  35 
  36 #include <stdbool.h>
  37 #include <stddef.h>
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 
  42 #ifdef _WIN32
  43 #include <fcntl.h>
  44 #include <windows.h>
  45 #endif
  46 
  47 // #define COMPACT_OUTPUT
  48 
  49 const char* info = ""
  50 "nn [options...] [filepaths...]\n"
  51 "\n"
  52 "\n"
  53 "Nice Numbers is an app which renders the plain text it's given to make long\n"
  54 "numbers much easier to read, by alternating 3-digit groups which are colored\n"
  55 "using ANSI-codes with unstyled ones.\n"
  56 "\n"
  57 "Unlike the common practice of inserting commas between 3-digit groups, this\n"
  58 "alternative doesn't widen the original text, keeping any alignments the same.\n"
  59 "\n"
  60 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  61 "from the standard input.\n"
  62 "\n"
  63 "\n"
  64 "Options, all of which can start with either 1 or 2 dashes:\n"
  65 "\n"
  66 "\n"
  67 "  -blue     use a blue-like color to alternate-style runs of digits\n"
  68 "  -bold     use a bold style/effect to alternate-style runs of digits\n"
  69 "  -gray     use a gray color to alternate-style runs of digits\n"
  70 "  -green    use a green color to alternate-style runs of digits\n"
  71 "  -inverse  invert/swap colors to alternate-style runs of digits\n"
  72 "  -orange   use an orange color to alternate-style runs of digits\n"
  73 "  -purple   use a purple color to alternate-style runs of digits\n"
  74 "  -red      use a red color to alternate-style runs of digits\n"
  75 "\n"
  76 "  -h          show this help message\n"
  77 "  -help       show this help message\n"
  78 "\n"
  79 "  -highlight  same as option -inverse\n"
  80 "  -hilite     same as option -inverse\n"
  81 "";
  82 
  83 const char* no_line_memory_msg = "can't get enough memory to read lines";
  84 
  85 // span is a region of bytes in memory
  86 typedef struct span {
  87     // ptr is the starting place of the region
  88     unsigned char* ptr;
  89 
  90     // len is how many bytes are in the region
  91     size_t len;
  92 } span;
  93 
  94 // advance updates a span so it starts after the number of bytes given
  95 void advance(span* src, size_t n) {
  96     src->ptr += n;
  97     src->len -= n;
  98 }
  99 
 100 // slice is a growable region of bytes in memory
 101 typedef struct slice {
 102     // ptr is the starting place of the region
 103     unsigned char* ptr;
 104 
 105     // len is how many bytes are currently being used
 106     size_t len;
 107 
 108     // cap is how many bytes the memory region has available
 109     size_t cap;
 110 } slice;
 111 
 112 void write_bytes(FILE* w, const unsigned char* src, size_t len) {
 113     fwrite(src, len, 1, w);
 114 }
 115 
 116 // find_digit returns the index of the first digit found, or a negative value
 117 // on failure
 118 int64_t find_digit(span s) {
 119     for (size_t i = 0; i < s.len; i++) {
 120         const unsigned char b = s.ptr[i];
 121         if ('0' <= b && b <= '9') {
 122             return i;
 123         }
 124     }
 125     return -1;
 126 }
 127 
 128 // find_non_digit returns the index of the first non-digit found, or a negative
 129 // value on failure
 130 int64_t find_non_digit(span s) {
 131     for (size_t i = 0; i < s.len; i++) {
 132         const unsigned char b = s.ptr[i];
 133         if (b < '0' || b > '9') {
 134             return i;
 135         }
 136     }
 137     return -1;
 138 }
 139 
 140 const unsigned char reset_style[] = "\x1b[0m";
 141 
 142 // restyle_digits renders a run of digits as alternating styled/unstyled runs
 143 // of 3 digits, which greatly improves readability, and is the only purpose
 144 // of this app; string is assumed to be all decimal digits
 145 void restyle_digits(FILE* w, span digits, span style) {
 146     if (digits.len < 4) {
 147         // digit sequence is short, so emit it as is
 148         write_bytes(w, digits.ptr, digits.len);
 149         return;
 150     }
 151 
 152     // separate leading 0..2 digits which don't align with the 3-digit groups
 153     size_t lead = digits.len % 3;
 154     // emit leading digits unstyled, if there are any
 155     write_bytes(w, digits.ptr, lead);
 156     // the rest is guaranteed to have a length which is a multiple of 3
 157     advance(&digits, lead);
 158 
 159     // start with the alternate style, unless there were no leading digits
 160     bool style_now = lead != 0;
 161 
 162     while (digits.len > 0) {
 163         if (style_now) {
 164             write_bytes(w, style.ptr, style.len);
 165             write_bytes(w, digits.ptr, 3);
 166             write_bytes(w, reset_style, sizeof(reset_style) - 1);
 167         } else {
 168             write_bytes(w, digits.ptr, 3);
 169         }
 170 
 171         advance(&digits, 3);
 172         // alternate between styled and unstyled 3-digit groups
 173         style_now = !style_now;
 174     }
 175 }
 176 
 177 // restyle_line renders the line given, using ANSI-styles to make any long
 178 // numbers in it more legible
 179 void restyle_line(FILE* w, span line, span alt_style) {
 180     while (!feof(w) && line.len > 0) {
 181         int64_t i = find_digit(line);
 182         if (i < 0) {
 183             // no (more) digits for sure
 184             write_bytes(w, line.ptr, line.len);
 185             return;
 186         }
 187 
 188         // some ANSI-style sequences use 4-digit numbers, which are long
 189         // enough for this app to mangle
 190         const unsigned char* p = line.ptr;
 191         bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '[';
 192 
 193         // emit line before current digit-run
 194         write_bytes(w, line.ptr, i);
 195 
 196         advance(&line, i);
 197 
 198         // see where the digit-run ends
 199         int64_t j = find_non_digit(line);
 200         if (j < 0) {
 201             // the digit-run goes until the end
 202             if (!is_ansi) {
 203                 restyle_digits(w, line, alt_style);
 204             } else {
 205                 write_bytes(w, line.ptr, line.len);
 206             }
 207             return;
 208         }
 209 
 210         // emit styled digit-run... maybe
 211         if (!is_ansi) {
 212             span s;
 213             s.ptr = line.ptr;
 214             s.len = j;
 215             restyle_digits(w, s, alt_style);
 216         } else {
 217             write_bytes(w, line.ptr, j);
 218         }
 219 
 220         // skip right past the end of the digit-run
 221         advance(&line, j);
 222     }
 223 }
 224 
 225 // default_digits_style makes it easy to change the built-in default style
 226 #ifdef COMPACT_OUTPUT
 227 unsigned char default_digits_style[] = "\x1b[38;5;248m";
 228 #else
 229 unsigned char default_digits_style[] = "\x1b[38;2;168;168;168m";
 230 #endif
 231 
 232 typedef struct handler_args {
 233     FILE* w;
 234     slice* line;
 235     span style;
 236 } handler_args;
 237 
 238 bool bom_start(span s) {
 239     const unsigned char* p = s.ptr;
 240     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 241 }
 242 
 243 // handle_lines loops over input lines, restyling all digit-runs as more
 244 // readable `nice numbers`, fulfilling the app's purpose
 245 bool handle_lines(handler_args args, FILE* src) {
 246     FILE* w = args.w;
 247     slice* line = args.line;
 248     span trimmed;
 249 
 250     for (size_t i = 0; !feof(w); i++) {
 251         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 252         if (len < 0) {
 253             break;
 254         }
 255 
 256         if (line->ptr == NULL) {
 257             fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
 258             return false;
 259         }
 260 
 261         line->len = len;
 262         trimmed.ptr = line->ptr;
 263         trimmed.len = line->len;
 264 
 265         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 266         if (i == 0 && bom_start(trimmed)) {
 267             trimmed.ptr += 3;
 268             trimmed.len -= 3;
 269             len = trimmed.len;
 270         }
 271 
 272         const unsigned char* p = trimmed.ptr;
 273         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 274         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 275             trimmed.len -= 2;
 276         } else if (len >= 1 && p[len - 1] == '\n') {
 277             trimmed.len--;
 278         }
 279 
 280         restyle_line(w, trimmed, args.style);
 281         putc('\n', w);
 282         fflush(w);
 283     }
 284 
 285     return true;
 286 }
 287 
 288 // handle_file handles data from the filename given; returns false only when
 289 // the file can't be opened
 290 bool handle_file(handler_args args, const char* path) {
 291     FILE* f = fopen(path, "rb");
 292     if (f == NULL) {
 293         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 294         return false;
 295     }
 296 
 297     const bool ok = handle_lines(args, f);
 298     fclose(f);
 299     return ok;
 300 }
 301 
 302 const char *style_names_aliases[] = {
 303     "b", "blue",
 304     "g", "green",
 305     "h", "inverse",
 306     "i", "inverse",
 307     "m", "magenta",
 308     "o", "orange",
 309     "p", "purple",
 310     "r", "red",
 311     "u", "underline",
 312 
 313     "hi", "inverse",
 314     "ma", "magenta",
 315     "or", "orange",
 316     "un", "underline",
 317 
 318     "inv", "inverse",
 319     "mag", "magenta",
 320 
 321     "grey", "gray",
 322     "highlight", "inverse",
 323     "highlighted", "inverse",
 324     "hilite", "inverse",
 325     "hilited", "inverse",
 326     "invert", "inverse",
 327     "inverted", "inverse",
 328     "underlined", "underline",
 329 
 330     "bb", "blueback",
 331     "gb", "greenback",
 332     "mb", "magentaback",
 333     "ob", "orangeback",
 334     "pb", "purpleback",
 335     "rb", "redback",
 336 
 337     "greyback", "grayback",
 338 };
 339 
 340 #ifdef COMPACT_OUTPUT
 341 char *styles[] = {
 342     "blue", "\x1b[38;5;26m",
 343     "bold", "\x1b[1m",
 344     "gray", "\x1b[38;5;248m",
 345     "green", "\x1b[38;5;29m",
 346     "inverse", "\x1b[7m",
 347     "magenta", "\x1b[38;5;165m",
 348     "orange", "\x1b[38;5;166m",
 349     "purple", "\x1b[38;5;99m",
 350     "red", "\x1b[38;5;1m",
 351     "underline", "\x1b[4m",
 352 
 353     "blueback", "\x1b[48;5;26m\x1b[38;5;15m",
 354     "grayback", "\x1b[48;5;248m\x1b[38;5;15m",
 355     "greenback", "\x1b[48;5;29m\x1b[38;5;15m",
 356     "magentaback", "\x1b[48;5;165m\x1b[38;5;15m",
 357     "orangeback", "\x1b[48;5;166m\x1b[38;5;15m",
 358     "purpleback", "\x1b[48;5;99m\x1b[38;5;15m",
 359     "redback", "\x1b[48;5;1m\x1b[38;5;15m",
 360 };
 361 #else
 362 char *styles[] = {
 363     "blue", "\x1b[38;2;0;95;215m",
 364     "bold", "\x1b[1m",
 365     "gray", "\x1b[38;2;168;168;168m",
 366     "green", "\x1b[38;2;0;135;95m",
 367     "inverse", "\x1b[7m",
 368     "magenta", "\x1b[38;2;215;0;255m",
 369     "orange", "\x1b[38;2;215;95;0m",
 370     "purple", "\x1b[38;2;135;95;255m",
 371     "red", "\x1b[38;2;204;0;0m",
 372     "underline", "\x1b[4m",
 373 
 374     "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 375     "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 376     "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 377     "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 378     "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 379     "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 380     "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 381 };
 382 #endif
 383 
 384 bool change_style(const char* arg, span* style) {
 385     // style-changing options must have 1 or 2 leading dashes
 386     if (arg[0] != '-') {
 387         return false;
 388     }
 389 
 390     // skip up to 2 leading dashes
 391     const char* s = arg + (arg[1] == '-' ? 2 : 1);
 392 
 393     // resolve style-name aliases
 394     const size_t n = sizeof(style_names_aliases) / sizeof(char*);
 395     for (size_t i = 0; i < n; i += 2) {
 396         if (strcmp(s, style_names_aliases[i]) == 0) {
 397             s = style_names_aliases[i + 1];
 398             break;
 399         }
 400     }
 401 
 402     // try to find ANSI-code for the style-name given
 403     for (size_t i = 0; i < sizeof(styles) / sizeof(char *); i += 2) {
 404         if (strcmp(s, styles[i]) == 0) {
 405             style->ptr = (unsigned char*)styles[i + 1];
 406             style->len = strlen(styles[i + 1]);
 407             return true;
 408         }
 409     }
 410 
 411     return false;
 412 }
 413 
 414 // run returns the number of errors
 415 int run(int argc, char** argv, FILE* w) {
 416     size_t files = 0;
 417     size_t errors = 0;
 418 
 419     slice line;
 420     line.len = 0;
 421     line.cap = 32 * 1024;
 422     line.ptr = malloc(line.cap);
 423 
 424     if (line.ptr == NULL) {
 425         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
 426         return 1;
 427     }
 428 
 429     handler_args args;
 430     args.w = w;
 431     args.line = &line;
 432     args.style.ptr = default_digits_style;
 433     args.style.len = strlen((char*)default_digits_style);
 434 
 435     for (size_t i = 1; i < (size_t)argc && !feof(w) && line.ptr != NULL; i++) {
 436         const char* arg = argv[i];
 437 
 438         // `-` means standard input
 439         if (arg[0] == '-' && arg[1] == 0) {
 440             if (!handle_lines(args, stdin)) {
 441                 errors++;
 442             }
 443             files++;
 444             continue;
 445         }
 446 
 447         if (arg[0] == '-') {
 448             if (!change_style(arg, &args.style)) {
 449                 const char* f = "\x1b[31munsupported style named %s\x1b[0m\n";
 450                 fprintf(stderr, f, arg);
 451                 errors++;
 452             }
 453             continue;
 454         }
 455 
 456         if (!handle_file(args, arg)) {
 457             errors++;
 458         }
 459         files++;
 460     }
 461 
 462     // use stdin when not given any filepaths
 463     if (files == 0) {
 464         if (!handle_lines(args, stdin)) {
 465             errors++;
 466         }
 467     }
 468 
 469     free(line.ptr);
 470     return errors;
 471 }
 472 
 473 // is_help_option simplifies control-flow for func main
 474 bool is_help_option(const char* s) {
 475     return (s[0] == '-') && (
 476         strcmp(s, "-h") == 0 ||
 477         strcmp(s, "-help") == 0 ||
 478         strcmp(s, "--h") == 0 ||
 479         strcmp(s, "--help") == 0
 480     );
 481 }
 482 
 483 int main(int argc, char** argv) {
 484 #ifdef _WIN32
 485     setmode(fileno(stdin), O_BINARY);
 486     // ensure output lines end in LF instead of CRLF on windows
 487     setmode(fileno(stdout), O_BINARY);
 488     setmode(fileno(stderr), O_BINARY);
 489 #endif
 490 
 491     // handle any of the help options, if given
 492     if (argc > 1 && is_help_option(argv[1])) {
 493         puts(info);
 494         return 0;
 495     }
 496 
 497     return run(argc, argv, stdout) == 0 ? 0 : 1;
 498 }