File: nn.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./nn ./nn.c 29 30 Building with COMPACT_OUTPUT defined makes `nn` output many fewer bytes, at 31 the cost of using arguably worse colors. You can do that by running 32 33 cc -s -O3 -march=native -mtune=native -flto -D COMPACT_OUTPUT -o ./nh ./nh.c 34 */ 35 36 #include <stdbool.h> 37 #include <stddef.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 43 #ifdef _WIN32 44 #include <fcntl.h> 45 #include <windows.h> 46 #endif 47 48 #ifdef RED_ERRORS 49 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 50 #ifdef __APPLE__ 51 #define ERROR_STYLE "\x1b[31m" 52 #endif 53 #define RESET_STYLE "\x1b[0m" 54 #else 55 #define ERROR_STYLE 56 #define RESET_STYLE 57 #endif 58 59 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 60 61 #define BAD_ALLOC 2 62 63 // #define COMPACT_OUTPUT 64 65 // EMIT_CONST emits string constants without their final null byte 66 #define EMIT_CONST(w, x) fwrite(x, 1, sizeof(x) - 1, w) 67 68 const char* info = "" 69 "nn [options...] [filepaths...]\n" 70 "\n" 71 "\n" 72 "Nice Numbers is an app which renders the plain text it's given to make long\n" 73 "numbers much easier to read, by alternating 3-digit groups which are colored\n" 74 "using ANSI-codes with unstyled ones.\n" 75 "\n" 76 "Unlike the common practice of inserting commas between 3-digit groups, this\n" 77 "alternative doesn't widen the original text, keeping any alignments the same.\n" 78 "\n" 79 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n" 80 "from the standard input.\n" 81 "\n" 82 "\n" 83 "Options, all of which can start with either 1 or 2 dashes:\n" 84 "\n" 85 "\n" 86 " -blue use a blue-like color to alternate-style runs of digits\n" 87 " -bold use a bold style/effect to alternate-style runs of digits\n" 88 " -gray use a gray color to alternate-style runs of digits\n" 89 " -green use a green color to alternate-style runs of digits\n" 90 " -inverse invert/swap colors to alternate-style runs of digits\n" 91 " -orange use an orange color to alternate-style runs of digits\n" 92 " -purple use a purple color to alternate-style runs of digits\n" 93 " -red use a red color to alternate-style runs of digits\n" 94 "\n" 95 " -h show this help message\n" 96 " -help show this help message\n" 97 "\n" 98 " -highlight same as option -inverse\n" 99 " -hilite same as option -inverse\n" 100 ""; 101 102 // span is a region of bytes in memory 103 typedef struct span { 104 // ptr is the starting place of the region 105 unsigned char* ptr; 106 107 // len is how many bytes are in the region 108 size_t len; 109 } span; 110 111 // advance updates a span so it starts after the number of bytes given 112 static inline void advance(span* src, size_t n) { 113 src->ptr += n; 114 src->len -= n; 115 } 116 117 // slice is a growable region of bytes in memory 118 typedef struct slice { 119 // ptr is the starting place of the region 120 unsigned char* ptr; 121 122 // cap is how many bytes the memory region has available 123 size_t cap; 124 } slice; 125 126 // find_digit returns the index of the first digit found, or a negative value 127 // on failure 128 static inline int64_t find_digit(span s) { 129 for (size_t i = 0; i < s.len; i++) { 130 const unsigned char b = s.ptr[i]; 131 if ('0' <= b && b <= '9') { 132 return i; 133 } 134 } 135 return -1; 136 } 137 138 // find_non_digit returns the index of the first non-digit found, or a negative 139 // value on failure 140 static inline int64_t find_non_digit(span s) { 141 for (size_t i = 0; i < s.len; i++) { 142 const unsigned char b = s.ptr[i]; 143 if (b < '0' || b > '9') { 144 return i; 145 } 146 } 147 return -1; 148 } 149 150 // restyle_digits renders a run of digits as alternating styled/unstyled runs 151 // of 3 digits, which greatly improves readability, and is the only purpose 152 // of this app; string is assumed to be all decimal digits 153 void restyle_digits(FILE* w, span digits, span style) { 154 if (digits.len < 4) { 155 // digit sequence is short, so emit it as is 156 fwrite(digits.ptr, 1, digits.len, w); 157 return; 158 } 159 160 // separate leading 0..2 digits which don't align with the 3-digit groups 161 size_t lead = digits.len % 3; 162 // emit leading digits unstyled, if there are any 163 fwrite(digits.ptr, 1, lead, w); 164 // the rest is guaranteed to have a length which is a multiple of 3 165 advance(&digits, lead); 166 167 // start with the alternate style, unless there were no leading digits 168 bool style_now = lead != 0; 169 170 while (digits.len > 0) { 171 if (style_now) { 172 fwrite(style.ptr, 1, style.len, w); 173 fwrite(digits.ptr, 1, 3, w); 174 EMIT_CONST(w, "\x1b[0m"); 175 } else { 176 fwrite(digits.ptr, 1, 3, w); 177 } 178 179 advance(&digits, 3); 180 // alternate between styled and unstyled 3-digit groups 181 style_now = !style_now; 182 } 183 } 184 185 // restyle_line renders the line given, using ANSI-styles to make any long 186 // numbers in it more legible 187 void restyle_line(FILE* w, unsigned char* s, size_t len, span style) { 188 span line; 189 line.ptr = s; 190 line.len = len; 191 192 while (!feof(w) && line.len > 0) { 193 int64_t i = find_digit(line); 194 if (i < 0) { 195 // no (more) digits for sure 196 fwrite(line.ptr, 1, line.len, w); 197 return; 198 } 199 200 // some ANSI-style sequences use 4-digit numbers, which are long 201 // enough for this app to mangle 202 bool is_ansi = i >= 2 && s[i - 2] == '\x1b' && s[i - 1] == '['; 203 204 // emit line before current digit-run 205 fwrite(line.ptr, 1, i, w); 206 207 advance(&line, i); 208 209 // see where the digit-run ends 210 int64_t j = find_non_digit(line); 211 if (j < 0) { 212 // the digit-run goes until the end 213 if (!is_ansi) { 214 restyle_digits(w, line, style); 215 } else { 216 fwrite(line.ptr, 1, line.len, w); 217 } 218 return; 219 } 220 221 // emit styled digit-run... maybe 222 if (!is_ansi) { 223 span chunk; 224 chunk.ptr = line.ptr; 225 chunk.len = j; 226 restyle_digits(w, chunk, style); 227 } else { 228 fwrite(line.ptr, 1, j, w); 229 } 230 231 // skip right past the end of the digit-run 232 advance(&line, j); 233 } 234 } 235 236 // default_digits_style makes it easy to change the built-in default style 237 #ifdef COMPACT_OUTPUT 238 unsigned char default_digits_style[] = "\x1b[38;5;248m"; 239 #else 240 unsigned char default_digits_style[] = "\x1b[38;2;168;168;168m"; 241 #endif 242 243 typedef struct handler_args { 244 FILE* w; 245 slice* line; 246 span style; 247 } handler_args; 248 249 bool starts_with_bom(const unsigned char* p, size_t len) { 250 return len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 251 } 252 253 // handle_lines loops over input lines, restyling all digit-runs as more 254 // readable `nice numbers`, fulfilling the app's purpose 255 void handle_lines(handler_args args, FILE* src, bool live_lines) { 256 FILE* w = args.w; 257 slice* line = args.line; 258 259 for (size_t i = 0; !feof(w); i++) { 260 ssize_t len = getline((char**)&line->ptr, &line->cap, src); 261 if (line->ptr == NULL) { 262 fprintf(stderr, "\n"); 263 fprintf(stderr, ERROR_LINE("out of memory")); 264 exit(BAD_ALLOC); 265 } 266 267 if (len < 0) { 268 break; 269 } 270 271 unsigned char* ptr = line->ptr; 272 273 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 274 if (i == 0 && starts_with_bom(ptr, len)) { 275 ptr += 3; 276 len -= 3; 277 } 278 279 // replace trailing carriage-returns with line-feeds 280 if (len >= 1 && ptr[len - 1] == '\r') { 281 ptr[len - 1] = '\n'; 282 } 283 284 // get rid of carriage-returns preceding line-feeds 285 if (len >= 2 && ptr[len - 2] == '\r' && ptr[len - 1] == '\n') { 286 ptr[len - 2] = '\n'; 287 len--; 288 } 289 290 restyle_line(w, ptr, len, args.style); 291 if (len < 1 || ptr[len - 1] != '\n') { 292 fputc('\n', w); 293 } 294 if (live_lines) { 295 fflush(w); 296 } 297 } 298 299 if (!live_lines) { 300 fflush(w); 301 } 302 } 303 304 // handle_file handles data from the filename given; returns false only when 305 // the file can't be opened 306 bool handle_file(handler_args args, const char* path, bool live_lines) { 307 FILE* f = fopen(path, "rb"); 308 if (f == NULL) { 309 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 310 return false; 311 } 312 313 handle_lines(args, f, live_lines); 314 fclose(f); 315 return true; 316 } 317 318 const char* style_names_aliases[] = { 319 "b", "blue", 320 "g", "green", 321 "h", "inverse", 322 "i", "inverse", 323 "m", "magenta", 324 "o", "orange", 325 "p", "purple", 326 "r", "red", 327 "u", "underline", 328 329 "hi", "inverse", 330 "ma", "magenta", 331 "or", "orange", 332 "un", "underline", 333 334 "inv", "inverse", 335 "mag", "magenta", 336 337 "grey", "gray", 338 "highlight", "inverse", 339 "highlighted", "inverse", 340 "hilite", "inverse", 341 "hilited", "inverse", 342 "invert", "inverse", 343 "inverted", "inverse", 344 "underlined", "underline", 345 346 "bb", "blueback", 347 "gb", "greenback", 348 "mb", "magentaback", 349 "ob", "orangeback", 350 "pb", "purpleback", 351 "rb", "redback", 352 353 "greyback", "grayback", 354 }; 355 356 #ifdef COMPACT_OUTPUT 357 char* styles[] = { 358 "blue", "\x1b[38;5;26m", 359 "bold", "\x1b[1m", 360 "gray", "\x1b[38;5;248m", 361 "green", "\x1b[38;5;29m", 362 "inverse", "\x1b[7m", 363 "magenta", "\x1b[38;5;165m", 364 "orange", "\x1b[38;5;166m", 365 "purple", "\x1b[38;5;99m", 366 "red", "\x1b[38;5;1m", 367 "underline", "\x1b[4m", 368 369 "blueback", "\x1b[48;5;26m\x1b[38;5;15m", 370 "grayback", "\x1b[48;5;248m\x1b[38;5;15m", 371 "greenback", "\x1b[48;5;29m\x1b[38;5;15m", 372 "magentaback", "\x1b[48;5;165m\x1b[38;5;15m", 373 "orangeback", "\x1b[48;5;166m\x1b[38;5;15m", 374 "purpleback", "\x1b[48;5;99m\x1b[38;5;15m", 375 "redback", "\x1b[48;5;1m\x1b[38;5;15m", 376 }; 377 #else 378 char* styles[] = { 379 "blue", "\x1b[38;2;0;95;215m", 380 "bold", "\x1b[1m", 381 "gray", "\x1b[38;2;168;168;168m", 382 "green", "\x1b[38;2;0;135;95m", 383 "inverse", "\x1b[7m", 384 "magenta", "\x1b[38;2;215;0;255m", 385 "orange", "\x1b[38;2;215;95;0m", 386 "purple", "\x1b[38;2;135;95;255m", 387 "red", "\x1b[38;2;204;0;0m", 388 "underline", "\x1b[4m", 389 390 "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m", 391 "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m", 392 "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m", 393 "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m", 394 "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m", 395 "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m", 396 "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m", 397 }; 398 #endif 399 400 bool change_style(const char* arg, span* style) { 401 // style-changing options must have 1 or 2 leading dashes 402 if (arg[0] != '-') { 403 return false; 404 } 405 406 // skip up to 2 leading dashes 407 const char* s = arg + (arg[1] == '-' ? 2 : 1); 408 409 // resolve style-name aliases 410 const size_t n = sizeof(style_names_aliases) / sizeof(char*); 411 for (size_t i = 0; i < n; i += 2) { 412 if (strcmp(s, style_names_aliases[i]) == 0) { 413 s = style_names_aliases[i + 1]; 414 break; 415 } 416 } 417 418 // try to find ANSI-code for the style-name given 419 for (size_t i = 0; i < sizeof(styles) / sizeof(char*); i += 2) { 420 if (strcmp(s, styles[i]) == 0) { 421 style->ptr = (unsigned char*)styles[i + 1]; 422 style->len = strlen(styles[i + 1]); 423 return true; 424 } 425 } 426 427 return false; 428 } 429 430 // run returns the number of errors 431 int run(int argc, char** argv, FILE* w, bool live_lines) { 432 size_t files = 0; 433 size_t errors = 0; 434 435 slice line; 436 line.cap = 32 * 1024; 437 line.ptr = malloc(line.cap); 438 439 if (line.ptr == NULL) { 440 fprintf(stderr, ERROR_LINE("out of memory")); 441 exit(BAD_ALLOC); 442 } 443 444 handler_args args; 445 args.w = w; 446 args.line = &line; 447 args.style.ptr = default_digits_style; 448 args.style.len = strlen((char*)default_digits_style); 449 450 for (size_t i = 1; i < (size_t)argc && !feof(w); i++) { 451 const char* arg = argv[i]; 452 453 // `-` means standard input 454 if (arg[0] == '-' && arg[1] == 0) { 455 handle_lines(args, stdin, live_lines); 456 files++; 457 continue; 458 } 459 460 if (arg[0] == '-') { 461 if (!change_style(arg, &args.style)) { 462 fprintf(stderr, ERROR_LINE("unsupported style named %s"), arg); 463 errors++; 464 } 465 continue; 466 } 467 468 if (!handle_file(args, arg, live_lines)) { 469 errors++; 470 } 471 files++; 472 } 473 474 // use stdin when not given any filepaths 475 if (files == 0) { 476 handle_lines(args, stdin, live_lines); 477 } 478 479 free(line.ptr); 480 return errors; 481 } 482 483 // is_help_option simplifies control-flow for func main 484 bool is_help_option(const char* s) { 485 return (s[0] == '-') && ( 486 strcmp(s, "-h") == 0 || 487 strcmp(s, "-help") == 0 || 488 strcmp(s, "--h") == 0 || 489 strcmp(s, "--help") == 0 490 ); 491 } 492 493 int main(int argc, char** argv) { 494 #ifdef _WIN32 495 setmode(fileno(stdin), O_BINARY); 496 // ensure output lines end in LF instead of CRLF on windows 497 setmode(fileno(stdout), O_BINARY); 498 setmode(fileno(stderr), O_BINARY); 499 #endif 500 501 // handle any of the help options, if given 502 if (argc > 1 && is_help_option(argv[1])) { 503 printf("%s", info); 504 return 0; 505 } 506 507 const bool live_lines = lseek(fileno(stdout), 0, SEEK_CUR) != 0; 508 if (!live_lines) { 509 setvbuf(stdout, NULL, _IOFBF, 0); 510 } 511 return run(argc, argv, stdout, live_lines) == 0 ? 0 : 1; 512 }