File: nn.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 cc -Wall -s -O2 -o ./nn ./nn.c 28 */ 29 30 #include <fcntl.h> 31 #include <stdbool.h> 32 #include <stddef.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 37 #ifdef _WIN32 38 #include <windows.h> 39 #endif 40 41 // #define COMPACT_OUTPUT 42 43 // info is the message shown when this app is given any of its help options 44 const char* info = "" 45 "nn [options...] [filepaths...]\n" 46 "\n" 47 "\n" 48 "Nice Numbers is an app which renders the plain text it's given to make long\n" 49 "numbers much easier to read, by alternating 3-digit groups which are colored\n" 50 "using ANSI-codes with unstyled ones.\n" 51 "\n" 52 "Unlike the common practice of inserting commas between 3-digit groups, this\n" 53 "alternative doesn't widen the original text, keeping any alignments the same.\n" 54 "\n" 55 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n" 56 "from the standard input.\n" 57 "\n" 58 "\n" 59 "Options, all of which can start with either 1 or 2 dashes:\n" 60 "\n" 61 "\n" 62 " -blue use a blue-like color to alternate-style runs of digits\n" 63 " -bold use a bold style/effect to alternate-style runs of digits\n" 64 " -gray use a gray color to alternate-style runs of digits\n" 65 " -green use a green color to alternate-style runs of digits\n" 66 " -inverse invert/swap colors to alternate-style runs of digits\n" 67 " -orange use an orange color to alternate-style runs of digits\n" 68 " -purple use a purple color to alternate-style runs of digits\n" 69 " -red use a red color to alternate-style runs of digits\n" 70 "\n" 71 " -h show this help message\n" 72 " -help show this help message\n" 73 "\n" 74 " -highlight same as option -inverse\n" 75 " -hilite same as option -inverse\n" 76 ""; 77 78 const char* line_memory_error_msg = 79 "" 80 "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n"; 81 82 // slice is a growable region of bytes in memory 83 typedef struct slice { 84 // ptr is the starting place of the region 85 unsigned char* ptr; 86 87 // len is how many bytes are currently being used 88 size_t len; 89 90 // cap is how many bytes the memory region has available 91 size_t cap; 92 } slice; 93 94 // init_slice is the constructor for type slice 95 void init_slice(slice* s, size_t cap) { 96 s->ptr = malloc(cap); 97 s->len = 0; 98 s->cap = cap; 99 } 100 101 // advance updates a slice so it starts after the number of bytes given 102 inline void advance(slice* src, size_t n) { 103 src->ptr += n; 104 src->len -= n; 105 } 106 107 // find_digit returns the index of the first digit found, or a negative value 108 // on failure 109 long long int find_digit(slice s) { 110 for (size_t i = 0; i < s.len; i++) { 111 const unsigned char b = s.ptr[i]; 112 if ('0' <= b && b <= '9') { 113 return i; 114 } 115 } 116 return -1; 117 } 118 119 // find_non_digit returns the index of the first non-digit found, or a negative 120 // value on failure 121 long long int find_non_digit(slice s) { 122 for (size_t i = 0; i < s.len; i++) { 123 const unsigned char b = s.ptr[i]; 124 if (b < '0' || b > '9') { 125 return i; 126 } 127 } 128 return -1; 129 } 130 131 const unsigned char reset_style[] = "\x1b[0m"; 132 133 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 134 // stdout, it considerably speeds up this app, as intended 135 typedef struct bufwriter { 136 // buf is the buffer proper 137 unsigned char* buf; 138 139 // len is how many bytes of the buffer are currently being used 140 size_t len; 141 142 // cap is the capacity of the buffer, or the most bytes it can hold 143 size_t cap; 144 145 // out is the destination of all that's written into the buffer 146 FILE* out; 147 148 // done signals when/if no more output is accepted at the destination 149 bool done; 150 } bufwriter; 151 152 // init_bufwriter is the constructor for type bufwriter 153 void init_bufwriter(bufwriter* w, FILE* dst, unsigned char* buf, size_t cap) { 154 w->buf = buf; 155 w->len = 0; 156 w->cap = cap; 157 w->out = dst; 158 w->done = false; 159 } 160 161 // flush does as it says: it empties the buffer after ensuring its bytes end 162 // on their intended destination 163 void flush(bufwriter* w) { 164 if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) { 165 w->done = true; 166 } 167 w->len = 0; 168 } 169 170 // write_bytes does as it says, minimizing the number of calls to fwrite 171 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) { 172 if (w->len + len < w->cap) { 173 // all bytes fit into buffer 174 memcpy(w->buf + w->len, src, len); 175 w->len += len; 176 return; 177 } 178 179 // ensure current buffer bytes go out, before crossing strides 180 flush(w); 181 182 // emit all chunks striding beyond/at the buffer's capacity 183 for (; len >= w->cap; src += w->cap, len -= w->cap) { 184 if (fwrite(src, w->cap, 1, w->out) < 1) { 185 w->done = true; 186 return; 187 } 188 } 189 190 // now all, if any, remaining bytes will fit into the buffer 191 memcpy(w->buf, src, len); 192 w->len += len; 193 } 194 195 // write_byte does as it says 196 void write_byte(bufwriter* w, unsigned char b) { 197 if (w->len >= w->cap) { 198 flush(w); 199 } 200 w->buf[w->len] = b; 201 w->len++; 202 } 203 204 // restyle_digits renders a run of digits as alternating styled/unstyled runs 205 // of 3 digits, which greatly improves readability, and is the only purpose 206 // of this app; string is assumed to be all decimal digits 207 void restyle_digits(bufwriter* w, slice digits, slice style) { 208 if (digits.len < 4) { 209 // digit sequence is short, so emit it as is 210 write_bytes(w, digits.ptr, digits.len); 211 return; 212 } 213 214 // separate leading 0..2 digits which don't align with the 3-digit groups 215 size_t lead = digits.len % 3; 216 // emit leading digits unstyled, if there are any 217 write_bytes(w, digits.ptr, lead); 218 // the rest is guaranteed to have a length which is a multiple of 3 219 advance(&digits, lead); 220 221 // start with the alternate style, unless there were no leading digits 222 bool style_now = lead != 0; 223 224 while (digits.len > 0) { 225 if (style_now) { 226 write_bytes(w, style.ptr, style.len); 227 write_bytes(w, digits.ptr, 3); 228 write_bytes(w, reset_style, sizeof(reset_style) - 1); 229 } else { 230 write_bytes(w, digits.ptr, 3); 231 } 232 233 advance(&digits, 3); 234 // alternate between styled and unstyled 3-digit groups 235 style_now = !style_now; 236 } 237 } 238 239 // restyle_line renders the line given, using ANSI-styles to make any long 240 // numbers in it more legible 241 void restyle_line(bufwriter* w, slice line, slice alt_style) { 242 while (!w->done && line.len > 0) { 243 long int i = find_digit(line); 244 if (i < 0) { 245 // no (more) digits for sure 246 write_bytes(w, line.ptr, line.len); 247 flush(w); 248 return; 249 } 250 251 // some ANSI-style sequences use 4-digit numbers, which are long 252 // enough for this app to mangle 253 const unsigned char* p = line.ptr; 254 bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '['; 255 256 // emit line before current digit-run 257 write_bytes(w, line.ptr, i); 258 259 advance(&line, i); 260 261 // see where the digit-run ends 262 long int j = find_non_digit(line); 263 if (j < 0) { 264 // the digit-run goes until the end 265 if (!is_ansi) { 266 restyle_digits(w, line, alt_style); 267 } else { 268 write_bytes(w, line.ptr, line.len); 269 } 270 flush(w); 271 return; 272 } 273 274 // emit styled digit-run... maybe 275 if (!is_ansi) { 276 slice s; 277 s.ptr = line.ptr; 278 s.len = j; 279 restyle_digits(w, s, alt_style); 280 } else { 281 write_bytes(w, line.ptr, j); 282 } 283 284 // skip right past the end of the digit-run 285 advance(&line, j); 286 } 287 } 288 289 // default_digits_style makes it easy to change the built-in default style 290 unsigned char default_digits_style[] = "\x1b[38;5;248m"; 291 292 typedef struct handler_args { 293 bufwriter* w; 294 slice* line; 295 slice style; 296 } handler_args; 297 298 bool bom_start(slice s) { 299 const unsigned char* p = s.ptr; 300 return s.len >= 3 && p[0] == 0xef && p[0] == 0xbb && p[0] == 0xbf; 301 } 302 303 // handle_lines loops over input lines, restyling all digit-runs as more 304 // readable `nice numbers`, fulfilling the app's purpose 305 bool handle_lines(handler_args args, FILE* src) { 306 bufwriter* w = args.w; 307 slice* line = args.line; 308 slice trimmed; 309 310 for (size_t i = 0; !w->done; i++) { 311 int len = getline((char**)&line->ptr, &line->cap, src); 312 if (len < 0) { 313 break; 314 } 315 if (line->ptr == NULL) { 316 fprintf(stderr, line_memory_error_msg); 317 exit(1); 318 } 319 320 line->len = len; 321 trimmed.ptr = line->ptr; 322 trimmed.len = line->len; 323 324 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 325 if (i == 0 && bom_start(trimmed)) { 326 trimmed.ptr += 3; 327 } 328 329 const unsigned char* p = trimmed.ptr; 330 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 331 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 332 trimmed.len -= 2; 333 } else if (len >= 1 && p[len - 1] == '\n') { 334 trimmed.len--; 335 } 336 337 restyle_line(w, trimmed, args.style); 338 write_byte(w, '\n'); 339 flush(w); 340 } 341 342 flush(w); 343 return true; 344 } 345 346 // handle_file handles data from the filename given; returns false only when 347 // the file can't be opened 348 bool handle_file(handler_args args, char* path) { 349 FILE* f = fopen(path, "rb"); 350 if (f == NULL) { 351 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 352 return false; 353 } 354 355 const bool ok = handle_lines(args, f); 356 fclose(f); 357 return ok; 358 } 359 360 const char *style_names_aliases[] = { 361 "b", "blue", 362 "g", "green", 363 "h", "inverse", 364 "i", "inverse", 365 "m", "magenta", 366 "o", "orange", 367 "p", "purple", 368 "r", "red", 369 "u", "underline", 370 371 "hi", "inverse", 372 "ma", "magenta", 373 "or", "orange", 374 "un", "underline", 375 376 "inv", "inverse", 377 "mag", "magenta", 378 379 "grey", "gray", 380 "highlight", "inverse", 381 "highlighted", "inverse", 382 "hilite", "inverse", 383 "hilited", "inverse", 384 "invert", "inverse", 385 "inverted", "inverse", 386 "underlined", "underline", 387 388 "bb", "blueback", 389 "gb", "greenback", 390 "mb", "magentaback", 391 "ob", "orangeback", 392 "pb", "purpleback", 393 "rb", "redback", 394 395 "greyback", "grayback", 396 }; 397 398 #ifdef COMPACT_OUTPUT 399 char *styles[] = { 400 "blue", "\x1b[38;5;26m", 401 "bold", "\x1b[1m", 402 "gray", "\x1b[38;5;248m", 403 "green", "\x1b[38;5;29m", 404 "inverse", "\x1b[7m", 405 "magenta", "\x1b[38;5;165m", 406 "orange", "\x1b[38;5;166m", 407 "purple", "\x1b[38;5;99m", 408 "red", "\x1b[38;5;1m", 409 "underline", "\x1b[4m", 410 411 "blueback", "\x1b[48;5;26m\x1b[38;5;15m", 412 "grayback", "\x1b[48;5;248m\x1b[38;5;15m", 413 "greenback", "\x1b[48;5;29m\x1b[38;5;15m", 414 "magentaback", "\x1b[48;5;165m\x1b[38;5;15m", 415 "orangeback", "\x1b[48;5;166m\x1b[38;5;15m", 416 "purpleback", "\x1b[48;5;99m\x1b[38;5;15m", 417 "redback", "\x1b[48;5;1m\x1b[38;5;15m", 418 }; 419 #else 420 char *styles[] = { 421 "blue", "\x1b[38;2;0;95;215m", 422 "bold", "\x1b[1m", 423 "gray", "\x1b[38;2;168;168;168m", 424 "green", "\x1b[38;2;0;135;95m", 425 "inverse", "\x1b[7m", 426 "magenta", "\x1b[38;2;215;0;255m", 427 "orange", "\x1b[38;2;215;95;0m", 428 "purple", "\x1b[38;2;135;95;255m", 429 "red", "\x1b[38;2;204;0;0m", 430 "underline", "\x1b[4m", 431 432 "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m", 433 "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m", 434 "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m", 435 "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m", 436 "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m", 437 "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m", 438 "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m", 439 }; 440 #endif 441 442 // run returns the number of errors 443 size_t run(int argc, char** argv, slice* line) { 444 unsigned char* style = default_digits_style; 445 446 bufwriter w; 447 unsigned char obuf[48 * 1024]; 448 init_bufwriter(&w, stdout, obuf, sizeof(obuf)); 449 450 // handle leading options to change the ANSI-style used 451 size_t start = 1; 452 if (argc > 1 && argv[start][0] == '-') { 453 const char* s = argv[start] + (argv[start][1] == '-' ? 2 : 1); 454 455 // resolve style-name aliases 456 const size_t n = sizeof(style_names_aliases) / sizeof(char *); 457 for (size_t i = 0; i < n; i += 2) { 458 if (strcmp(s, style_names_aliases[i]) == 0) { 459 s = style_names_aliases[i + 1]; 460 break; 461 } 462 } 463 464 // find ANSI-code for the style-name given 465 size_t found = 0; 466 for (size_t i = 0; i < sizeof(styles) / sizeof(char *); i += 2) { 467 if (strcmp(s, styles[i]) == 0) { 468 start++; 469 found = 1; 470 style = (unsigned char*)styles[i + 1]; 471 break; 472 } 473 } 474 475 if (found == 0) { 476 fprintf(stderr, "\x1b[31munsupported style named %s\x1b[0m\n", s); 477 return 1; 478 } 479 } 480 481 size_t errors = 0; 482 handler_args args; 483 args.w = &w; 484 args.line = line; 485 args.style.ptr = style; 486 args.style.len = strlen((char*)style); 487 488 // use stdin when not given any filepaths 489 if ((size_t)argc <= start) { 490 if (!handle_lines(args, stdin)) { 491 errors++; 492 } 493 return errors; 494 } 495 496 for (size_t i = start; i < (size_t)argc && !w.done; i++) { 497 if (i > start) { 498 // put an extra empty line between adjacent outputs 499 write_byte(&w, '\n'); 500 } 501 502 if (argv[i][0] == '-' && argv[i][1] == 0) { 503 if (!handle_lines(args, stdin)) { 504 errors++; 505 } 506 continue; 507 } 508 509 if (!handle_file(args, argv[i])) { 510 errors++; 511 } 512 } 513 514 return errors; 515 } 516 517 // is_help_option simplifies control-flow for func main 518 bool is_help_option(char* s) { 519 return (s[0] == '-') && ( 520 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 521 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 522 ); 523 } 524 525 int main(int argc, char** argv) { 526 #ifdef _WIN32 527 setmode(fileno(stdin), O_BINARY); 528 // ensure output lines end in LF instead of CRLF on windows 529 setmode(fileno(stdout), O_BINARY); 530 setmode(fileno(stderr), O_BINARY); 531 #endif 532 533 // handle any of the help options, if given 534 if (argc > 1 && is_help_option(argv[1])) { 535 puts(info); 536 return 0; 537 } 538 539 // disable automatic stdio buffering, in favor of explicit buffering 540 setvbuf(stdin, NULL, _IONBF, 0); 541 setvbuf(stdout, NULL, _IONBF, 0); 542 setvbuf(stderr, NULL, _IONBF, 0); 543 544 slice line; 545 init_slice(&line, 16 * 1024); 546 if (line.ptr == NULL) { 547 fprintf(stderr, line_memory_error_msg); 548 return 1; 549 } 550 551 const int res = run(argc, argv, &line) == 0 ? 0 : 1; 552 free(line.ptr); 553 return res; 554 }