File: nn.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./nn ./nn.c
  29 
  30 Building with COMPACT_OUTPUT defined makes `nn` output many fewer bytes, at
  31 the cost of using arguably worse colors. You can do that by running
  32 
  33 cc -Wall -s -O2 -D COMPACT_OUTPUT -o ./nh ./nh.c
  34 */
  35 
  36 #include <fcntl.h>
  37 #include <stdbool.h>
  38 #include <stddef.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 
  43 #ifdef _WIN32
  44 #include <windows.h>
  45 #endif
  46 
  47 // #define COMPACT_OUTPUT
  48 
  49 // info is the message shown when this app is given any of its help options
  50 const char* info = ""
  51 "nn [options...] [filepaths...]\n"
  52 "\n"
  53 "\n"
  54 "Nice Numbers is an app which renders the plain text it's given to make long\n"
  55 "numbers much easier to read, by alternating 3-digit groups which are colored\n"
  56 "using ANSI-codes with unstyled ones.\n"
  57 "\n"
  58 "Unlike the common practice of inserting commas between 3-digit groups, this\n"
  59 "alternative doesn't widen the original text, keeping any alignments the same.\n"
  60 "\n"
  61 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  62 "from the standard input.\n"
  63 "\n"
  64 "\n"
  65 "Options, all of which can start with either 1 or 2 dashes:\n"
  66 "\n"
  67 "\n"
  68 "  -blue     use a blue-like color to alternate-style runs of digits\n"
  69 "  -bold     use a bold style/effect to alternate-style runs of digits\n"
  70 "  -gray     use a gray color to alternate-style runs of digits\n"
  71 "  -green    use a green color to alternate-style runs of digits\n"
  72 "  -inverse  invert/swap colors to alternate-style runs of digits\n"
  73 "  -orange   use an orange color to alternate-style runs of digits\n"
  74 "  -purple   use a purple color to alternate-style runs of digits\n"
  75 "  -red      use a red color to alternate-style runs of digits\n"
  76 "\n"
  77 "  -h          show this help message\n"
  78 "  -help       show this help message\n"
  79 "\n"
  80 "  -highlight  same as option -inverse\n"
  81 "  -hilite     same as option -inverse\n"
  82 "";
  83 
  84 const char* line_memory_error_msg =
  85     ""
  86     "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n";
  87 
  88 // slice is a growable region of bytes in memory
  89 typedef struct slice {
  90     // ptr is the starting place of the region
  91     unsigned char* ptr;
  92 
  93     // len is how many bytes are currently being used
  94     size_t len;
  95 
  96     // cap is how many bytes the memory region has available
  97     size_t cap;
  98 } slice;
  99 
 100 // init_slice is the constructor for type slice
 101 void init_slice(slice* s, size_t cap) {
 102     s->ptr = malloc(cap);
 103     s->len = 0;
 104     s->cap = cap;
 105 }
 106 
 107 // advance updates a slice so it starts after the number of bytes given
 108 inline void advance(slice* src, size_t n) {
 109     src->ptr += n;
 110     src->len -= n;
 111 }
 112 
 113 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) {
 114     fwrite(src, len, 1, w);
 115 }
 116 
 117 // find_digit returns the index of the first digit found, or a negative value
 118 // on failure
 119 long long int find_digit(slice s) {
 120     for (size_t i = 0; i < s.len; i++) {
 121         const unsigned char b = s.ptr[i];
 122         if ('0' <= b && b <= '9') {
 123             return i;
 124         }
 125     }
 126     return -1;
 127 }
 128 
 129 // find_non_digit returns the index of the first non-digit found, or a negative
 130 // value on failure
 131 long long int find_non_digit(slice s) {
 132     for (size_t i = 0; i < s.len; i++) {
 133         const unsigned char b = s.ptr[i];
 134         if (b < '0' || b > '9') {
 135             return i;
 136         }
 137     }
 138     return -1;
 139 }
 140 
 141 const unsigned char reset_style[] = "\x1b[0m";
 142 
 143 // restyle_digits renders a run of digits as alternating styled/unstyled runs
 144 // of 3 digits, which greatly improves readability, and is the only purpose
 145 // of this app; string is assumed to be all decimal digits
 146 void restyle_digits(FILE* w, slice digits, slice style) {
 147     if (digits.len < 4) {
 148         // digit sequence is short, so emit it as is
 149         write_bytes(w, digits.ptr, digits.len);
 150         return;
 151     }
 152 
 153     // separate leading 0..2 digits which don't align with the 3-digit groups
 154     size_t lead = digits.len % 3;
 155     // emit leading digits unstyled, if there are any
 156     write_bytes(w, digits.ptr, lead);
 157     // the rest is guaranteed to have a length which is a multiple of 3
 158     advance(&digits, lead);
 159 
 160     // start with the alternate style, unless there were no leading digits
 161     bool style_now = lead != 0;
 162 
 163     while (digits.len > 0) {
 164         if (style_now) {
 165             write_bytes(w, style.ptr, style.len);
 166             write_bytes(w, digits.ptr, 3);
 167             write_bytes(w, reset_style, sizeof(reset_style) - 1);
 168         } else {
 169             write_bytes(w, digits.ptr, 3);
 170         }
 171 
 172         advance(&digits, 3);
 173         // alternate between styled and unstyled 3-digit groups
 174         style_now = !style_now;
 175     }
 176 }
 177 
 178 // restyle_line renders the line given, using ANSI-styles to make any long
 179 // numbers in it more legible
 180 void restyle_line(FILE* w, slice line, slice alt_style) {
 181     while (!feof(w) && line.len > 0) {
 182         long int i = find_digit(line);
 183         if (i < 0) {
 184             // no (more) digits for sure
 185             write_bytes(w, line.ptr, line.len);
 186             return;
 187         }
 188 
 189         // some ANSI-style sequences use 4-digit numbers, which are long
 190         // enough for this app to mangle
 191         const unsigned char* p = line.ptr;
 192         bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '[';
 193 
 194         // emit line before current digit-run
 195         write_bytes(w, line.ptr, i);
 196 
 197         advance(&line, i);
 198 
 199         // see where the digit-run ends
 200         long int j = find_non_digit(line);
 201         if (j < 0) {
 202             // the digit-run goes until the end
 203             if (!is_ansi) {
 204                 restyle_digits(w, line, alt_style);
 205             } else {
 206                 write_bytes(w, line.ptr, line.len);
 207             }
 208             return;
 209         }
 210 
 211         // emit styled digit-run... maybe
 212         if (!is_ansi) {
 213             slice s;
 214             s.ptr = line.ptr;
 215             s.len = j;
 216             s.cap = j;
 217             restyle_digits(w, s, alt_style);
 218         } else {
 219             write_bytes(w, line.ptr, j);
 220         }
 221 
 222         // skip right past the end of the digit-run
 223         advance(&line, j);
 224     }
 225 }
 226 
 227 // default_digits_style makes it easy to change the built-in default style
 228 unsigned char default_digits_style[] = "\x1b[38;5;248m";
 229 
 230 typedef struct handler_args {
 231     FILE* w;
 232     slice* line;
 233     slice style;
 234 } handler_args;
 235 
 236 bool bom_start(slice s) {
 237     const unsigned char* p = s.ptr;
 238     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 239 }
 240 
 241 // handle_lines loops over input lines, restyling all digit-runs as more
 242 // readable `nice numbers`, fulfilling the app's purpose
 243 bool handle_lines(handler_args args, FILE* src) {
 244     FILE* w = args.w;
 245     slice* line = args.line;
 246     slice trimmed;
 247     trimmed.cap = 0;
 248 
 249     for (size_t i = 0; !feof(w); i++) {
 250         int len = getline((char**)&line->ptr, &line->cap, src);
 251         if (len < 0) {
 252             break;
 253         }
 254         if (line->ptr == NULL) {
 255             fprintf(stderr, line_memory_error_msg);
 256             exit(1);
 257         }
 258 
 259         line->len = len;
 260         trimmed.ptr = line->ptr;
 261         trimmed.len = line->len;
 262 
 263         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 264         if (i == 0 && bom_start(trimmed)) {
 265             trimmed.ptr += 3;
 266             trimmed.len -= 3;
 267             len = trimmed.len;
 268         }
 269 
 270         const unsigned char* p = trimmed.ptr;
 271         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 272         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 273             trimmed.len -= 2;
 274         } else if (len >= 1 && p[len - 1] == '\n') {
 275             trimmed.len--;
 276         }
 277 
 278         restyle_line(w, trimmed, args.style);
 279         putc('\n', w);
 280     }
 281 
 282     return true;
 283 }
 284 
 285 // handle_file handles data from the filename given; returns false only when
 286 // the file can't be opened
 287 bool handle_file(handler_args args, const char* path) {
 288     FILE* f = fopen(path, "rb");
 289     if (f == NULL) {
 290         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 291         return false;
 292     }
 293 
 294     const bool ok = handle_lines(args, f);
 295     fclose(f);
 296     return ok;
 297 }
 298 
 299 const char *style_names_aliases[] = {
 300     "b", "blue",
 301     "g", "green",
 302     "h", "inverse",
 303     "i", "inverse",
 304     "m", "magenta",
 305     "o", "orange",
 306     "p", "purple",
 307     "r", "red",
 308     "u", "underline",
 309 
 310     "hi", "inverse",
 311     "ma", "magenta",
 312     "or", "orange",
 313     "un", "underline",
 314 
 315     "inv", "inverse",
 316     "mag", "magenta",
 317 
 318     "grey", "gray",
 319     "highlight", "inverse",
 320     "highlighted", "inverse",
 321     "hilite", "inverse",
 322     "hilited", "inverse",
 323     "invert", "inverse",
 324     "inverted", "inverse",
 325     "underlined", "underline",
 326 
 327     "bb", "blueback",
 328     "gb", "greenback",
 329     "mb", "magentaback",
 330     "ob", "orangeback",
 331     "pb", "purpleback",
 332     "rb", "redback",
 333 
 334     "greyback", "grayback",
 335 };
 336 
 337 #ifdef COMPACT_OUTPUT
 338 char *styles[] = {
 339     "blue", "\x1b[38;5;26m",
 340     "bold", "\x1b[1m",
 341     "gray", "\x1b[38;5;248m",
 342     "green", "\x1b[38;5;29m",
 343     "inverse", "\x1b[7m",
 344     "magenta", "\x1b[38;5;165m",
 345     "orange", "\x1b[38;5;166m",
 346     "purple", "\x1b[38;5;99m",
 347     "red", "\x1b[38;5;1m",
 348     "underline", "\x1b[4m",
 349 
 350     "blueback", "\x1b[48;5;26m\x1b[38;5;15m",
 351     "grayback", "\x1b[48;5;248m\x1b[38;5;15m",
 352     "greenback", "\x1b[48;5;29m\x1b[38;5;15m",
 353     "magentaback", "\x1b[48;5;165m\x1b[38;5;15m",
 354     "orangeback", "\x1b[48;5;166m\x1b[38;5;15m",
 355     "purpleback", "\x1b[48;5;99m\x1b[38;5;15m",
 356     "redback", "\x1b[48;5;1m\x1b[38;5;15m",
 357 };
 358 #else
 359 char *styles[] = {
 360     "blue", "\x1b[38;2;0;95;215m",
 361     "bold", "\x1b[1m",
 362     "gray", "\x1b[38;2;168;168;168m",
 363     "green", "\x1b[38;2;0;135;95m",
 364     "inverse", "\x1b[7m",
 365     "magenta", "\x1b[38;2;215;0;255m",
 366     "orange", "\x1b[38;2;215;95;0m",
 367     "purple", "\x1b[38;2;135;95;255m",
 368     "red", "\x1b[38;2;204;0;0m",
 369     "underline", "\x1b[4m",
 370 
 371     "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 372     "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 373     "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 374     "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 375     "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 376     "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 377     "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 378 };
 379 #endif
 380 
 381 bool change_style(const char* arg, slice* style) {
 382     // style-changing options must have 1 or 2 leading dashes
 383     if (arg[0] != '-') {
 384         return false;
 385     }
 386 
 387     // skip up to 2 leading dashes
 388     const char* s = arg + (arg[1] == '-' ? 2 : 1);
 389 
 390     // resolve style-name aliases
 391     const size_t n = sizeof(style_names_aliases) / sizeof(char*);
 392     for (size_t i = 0; i < n; i += 2) {
 393         if (strcmp(s, style_names_aliases[i]) == 0) {
 394             s = style_names_aliases[i + 1];
 395             break;
 396         }
 397     }
 398 
 399     // try to find ANSI-code for the style-name given
 400     for (size_t i = 0; i < sizeof(styles) / sizeof(char *); i += 2) {
 401         if (strcmp(s, styles[i]) == 0) {
 402             style->ptr = (unsigned char*)styles[i + 1];
 403             style->len = strlen(styles[i + 1]);
 404             return true;
 405         }
 406     }
 407 
 408     return false;
 409 }
 410 
 411 // run returns the number of errors
 412 int run(int argc, char** argv, FILE* w, slice* line) {
 413     size_t files = 0;
 414     size_t errors = 0;
 415 
 416     handler_args args;
 417     args.w = w;
 418     args.line = line;
 419     args.style.ptr = default_digits_style;
 420     args.style.len = strlen((char*)default_digits_style);
 421 
 422     for (size_t i = 1; i < (size_t)argc && !feof(w); i++) {
 423         const char* arg = argv[i];
 424 
 425         // `-` means standard input
 426         if (arg[0] == '-' && arg[1] == 0) {
 427             if (!handle_lines(args, stdin)) {
 428                 errors++;
 429             }
 430             files++;
 431             continue;
 432         }
 433 
 434         if (arg[0] == '-') {
 435             if (!change_style(arg, &args.style)) {
 436                 char* fmt = "\x1b[31munsupported style named %s\x1b[0m\n";
 437                 fprintf(stderr, fmt, arg);
 438                 errors++;
 439             }
 440             continue;
 441         }
 442 
 443         if (!handle_file(args, arg)) {
 444             errors++;
 445         }
 446         files++;
 447     }
 448 
 449     // use stdin when not given any filepaths
 450     if (files == 0) {
 451         if (!handle_lines(args, stdin)) {
 452             errors++;
 453         }
 454     }
 455 
 456     return errors;
 457 }
 458 
 459 // is_help_option simplifies control-flow for func main
 460 bool is_help_option(char* s) {
 461     return (s[0] == '-') && (
 462         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 463         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 464     );
 465 }
 466 
 467 int main(int argc, char** argv) {
 468 #ifdef _WIN32
 469     setmode(fileno(stdin), O_BINARY);
 470     // ensure output lines end in LF instead of CRLF on windows
 471     setmode(fileno(stdout), O_BINARY);
 472     setmode(fileno(stderr), O_BINARY);
 473 #endif
 474 
 475     // handle any of the help options, if given
 476     if (argc > 1 && is_help_option(argv[1])) {
 477         puts(info);
 478         return 0;
 479     }
 480 
 481     slice line;
 482     init_slice(&line, 32 * 1024);
 483     if (line.ptr == NULL) {
 484         fprintf(stderr, line_memory_error_msg);
 485         return 1;
 486     }
 487 
 488     const int res = run(argc, argv, stdout, &line) == 0 ? 0 : 1;
 489     free(line.ptr);
 490     return res;
 491 }