File: nn.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 cc -Wall -s -O2 -o ./nn ./nn.c 28 */ 29 30 #include <fcntl.h> 31 #include <stdbool.h> 32 #include <stddef.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 37 // info is the message shown when this app is given any of its help options 38 const char* info = 39 "" 40 "nn [options...] [filepaths...]\n" 41 "\n" 42 "\n" 43 "Nice Numbers is an app which renders the plain text it's given to make long\n" 44 "numbers much easier to read, by alternating 3-digit groups which are colored\n" 45 "using ANSI-codes with unstyled ones.\n" 46 "\n" 47 "Unlike the common practice of inserting commas between 3-digit groups, this\n" 48 "alternative doesn't widen the original text, keeping any alignments the same.\n" 49 "\n" 50 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n" 51 "from the standard input.\n" 52 "\n" 53 "\n" 54 "Options, all of which can start with either 1 or 2 dashes:\n" 55 "\n" 56 "\n" 57 " -blue use a blue-like color to alternate-style runs of digits\n" 58 " -bold use a bold style/effect to alternate-style runs of digits\n" 59 " -gray use a gray color to alternate-style runs of digits\n" 60 " -green use a green color to alternate-style runs of digits\n" 61 " -inverse invert/swap colors to alternate-style runs of digits\n" 62 " -orange use an orange color to alternate-style runs of digits\n" 63 " -purple use a purple color to alternate-style runs of digits\n" 64 " -red use a red color to alternate-style runs of digits\n" 65 "\n" 66 " -h show this help message\n" 67 " -help show this help message\n" 68 "\n" 69 " -highlight same as option -inverse\n" 70 " -hilite same as option -inverse\n" 71 ""; 72 73 // slice is a growable region of bytes in memory 74 typedef struct slice { 75 // ptr is the starting place of the region 76 unsigned char* ptr; 77 78 // len is how many bytes are currently being used 79 size_t len; 80 81 // cap is how many bytes the memory region has available 82 size_t cap; 83 } slice; 84 85 // new_slice is the constructor for type slice 86 slice new_slice(size_t cap) { 87 slice res; 88 res.cap = cap; 89 res.len = 0; 90 res.ptr = malloc(res.cap); 91 return res; 92 } 93 94 // advance updates a slice so it starts after the number of bytes given 95 inline void advance(slice* src, size_t n) { 96 src->ptr += n; 97 src->len -= n; 98 } 99 100 // first creates a slice ending at the number of bytes given 101 slice first(slice src, size_t n) { 102 src.len = n; 103 return src; 104 } 105 106 // append_byte does as it says, potentially reallocating the memory area 107 // backing the slice given 108 void append_byte(slice* s, unsigned char b) { 109 if (s->len < s->cap) { 110 // under capacity, so it's ok to append directly 111 s->ptr[s->len] = b; 112 s->len++; 113 return; 114 } 115 116 // slice is full, so double it and reallocate 117 s->cap *= 2; 118 s->ptr = realloc(s->ptr, s->cap); 119 120 // now append directly to the larger array 121 s->ptr[s->len] = b; 122 s->len++; 123 } 124 125 // find_lf returns the index of the first line-feed found, or a negative value 126 // on failure 127 long long int find_lf(slice s) { 128 for (size_t i = 0; i < s.len; i++) { 129 if (s.ptr[i] == '\n') { 130 return i; 131 } 132 } 133 return -1; 134 } 135 136 // find_digit returns the index of the first digit found, or a negative value 137 // on failure 138 long long int find_digit(slice s) { 139 for (size_t i = 0; i < s.len; i++) { 140 const unsigned char b = s.ptr[i]; 141 if ('0' <= b && b <= '9') { 142 return i; 143 } 144 } 145 return -1; 146 } 147 148 // find_non_digit returns the index of the first non-digit found, or a negative 149 // value on failure 150 long long int find_non_digit(slice s) { 151 for (size_t i = 0; i < s.len; i++) { 152 const unsigned char b = s.ptr[i]; 153 if (b < '0' || b > '9') { 154 return i; 155 } 156 } 157 return -1; 158 } 159 160 const unsigned char reset_style[] = "\x1b[0m"; 161 162 // bufreader is a way to speed up reading data by reducing the frequency of 163 // data reads from the a data source, while still allowing reading 1 byte at 164 // a time 165 typedef struct bufreader { 166 // buf is the buffer, (re)filled periodically as needed 167 unsigned char* buf; 168 169 // len is how many buffer bytes are being used, out of its max capacity 170 size_t len; 171 172 // cap is the buffer's capacity, or the most bytes it can hold at once 173 size_t cap; 174 175 // pos is the current position, up to the current buffer length 176 size_t pos; 177 178 // src is the data source used to fill the buffer 179 FILE* src; 180 } bufreader; 181 182 // new_bufreader is the constructor for type bufreader 183 bufreader new_bufreader(FILE* src, size_t cap) { 184 bufreader res; 185 res.cap = cap; 186 res.len = 0; 187 res.pos = 0; 188 res.src = src; 189 res.buf = malloc(res.cap); 190 return res; 191 } 192 193 // close_bufreader deallocates the buffer 194 void close_bufreader(bufreader* r) { 195 free(r->buf); 196 r->buf = NULL; 197 r->len = 0; 198 } 199 200 // read_byte does as it says: check its return for the value EOF, before 201 // using it as the next byte 202 int read_byte(bufreader* r) { 203 if (r->pos < r->len) { 204 // inside current chunk 205 const unsigned char b = r->buf[r->pos]; 206 r->pos++; 207 return b; 208 } 209 210 // need to read the next block 211 r->pos = 0; 212 r->len = fread(r->buf, sizeof(unsigned char), r->cap, r->src); 213 if (r->len > 0) { 214 const unsigned char b = r->buf[r->pos]; 215 r->pos++; 216 return b; 217 } 218 219 // reached the end of data 220 return EOF; 221 } 222 223 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 224 // stdout, it considerably speeds up this app, as intended 225 typedef struct bufwriter { 226 // buf is the buffer proper 227 unsigned char* buf; 228 229 // len is how many bytes of the buffer are currently being used 230 size_t len; 231 232 // cap is the capacity of the buffer, or the most bytes it can hold 233 size_t cap; 234 235 // out is the destination of all that's written into the buffer 236 FILE* out; 237 238 // done signals when/if no more output is accepted at the destination 239 bool done; 240 } bufwriter; 241 242 // new_bufwriter is the constructor for type bufwriter 243 bufwriter new_bufwriter(FILE* dst, size_t cap) { 244 bufwriter res; 245 res.cap = cap; 246 res.done = false; 247 res.len = 0; 248 res.out = dst; 249 res.buf = malloc(res.cap); 250 return res; 251 } 252 253 // flush does as it says: it empties the buffer after ensuring its bytes end 254 // on their intended destination 255 void flush(bufwriter* w) { 256 if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) { 257 w->done = true; 258 } 259 w->len = 0; 260 } 261 262 // close_bufwriter ensures all output is shown and deallocates the buffer 263 void close_bufwriter(bufwriter* w) { 264 flush(w); 265 free(w->buf); 266 w->buf = NULL; 267 } 268 269 // write_bytes does as it says, minimizing the number of calls to fwrite 270 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) { 271 if (w->len + len < w->cap) { 272 // all bytes fit into buffer 273 memcpy(w->buf + w->len, src, len); 274 w->len += len; 275 return; 276 } 277 278 // ensure current buffer bytes go out, before crossing strides 279 flush(w); 280 281 // emit all chunks striding beyond/at the buffer's capacity 282 for (; len >= w->cap; src += w->cap, len -= w->cap) { 283 if (fwrite(src, w->cap, 1, w->out) < 1) { 284 w->done = true; 285 return; 286 } 287 } 288 289 // now all, if any, remaining bytes will fit into the buffer 290 memcpy(w->buf, src, len); 291 w->len += len; 292 } 293 294 // write_byte does as it says 295 void write_byte(bufwriter* w, unsigned char b) { 296 if (w->len >= w->cap) { 297 flush(w); 298 } 299 w->buf[w->len] = b; 300 w->len++; 301 } 302 303 // restyle_digits renders a run of digits as alternating styled/unstyled runs 304 // of 3 digits, which greatly improves readability, and is the only purpose 305 // of this app; string is assumed to be all decimal digits 306 void restyle_digits(bufwriter* w, slice digits, const unsigned char* style) { 307 if (digits.len < 4) { 308 // digit sequence is short, so emit it as is 309 write_bytes(w, digits.ptr, digits.len); 310 return; 311 } 312 313 // separate leading 0..2 digits which don't align with the 3-digit groups 314 size_t lead = digits.len % 3; 315 // emit leading digits unstyled, if there are any 316 write_bytes(w, digits.ptr, lead); 317 // the rest is guaranteed to have a length which is a multiple of 3 318 advance(&digits, lead); 319 320 size_t style_len = strlen((const char*)style); 321 // start with the alternate style, unless there were no leading digits 322 bool style_now = lead != 0; 323 324 while (digits.len > 0) { 325 if (style_now) { 326 write_bytes(w, style, style_len); 327 write_bytes(w, digits.ptr, 3); 328 write_bytes(w, reset_style, sizeof(reset_style) - 1); 329 } else { 330 write_bytes(w, digits.ptr, 3); 331 } 332 333 advance(&digits, 3); 334 // alternate between styled and unstyled 3-digit groups 335 style_now = !style_now; 336 } 337 } 338 339 // restyle_line renders the line given, using ANSI-styles to make any long 340 // numbers in it more legible 341 void restyle_line(bufwriter* w, slice line, const unsigned char* alt_style) { 342 while (!w->done && line.len > 0) { 343 long int i = find_digit(line); 344 if (i < 0) { 345 // no (more) digits for sure 346 write_bytes(w, line.ptr, line.len); 347 return; 348 } 349 350 // some ANSI-style sequences use 4-digit numbers, which are long 351 // enough for this app to mangle 352 const unsigned char* p = line.ptr; 353 bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '['; 354 355 // emit line before current digit-run 356 write_bytes(w, line.ptr, i); 357 358 advance(&line, i); 359 360 // see where the digit-run ends 361 long int j = find_non_digit(line); 362 if (j < 0) { 363 // the digit-run goes until the end 364 if (!is_ansi) { 365 restyle_digits(w, line, alt_style); 366 } else { 367 write_bytes(w, line.ptr, line.len); 368 } 369 return; 370 } 371 372 // emit styled digit-run... maybe 373 if (!is_ansi) { 374 slice s; 375 s.ptr = line.ptr; 376 s.len = j; 377 restyle_digits(w, s, alt_style); 378 } else { 379 write_bytes(w, line.ptr, j); 380 } 381 382 // skip right past the end of the digit-run 383 advance(&line, j); 384 } 385 } 386 387 /* 388 The info-message string below was made by running the command 389 390 awk 'BEGIN { print "const char* info = \"\"" } 391 { printf "\"%s\\n\"\n", $0 } 392 END { print "\"\";" }' info.txt 393 */ 394 395 // default_digits_style makes it easy to change the built-in default style 396 const unsigned char default_digits_style[] = "\x1b[38;5;248m"; 397 398 // buffer_size is trying to be a good value for modern CPU cores 399 const size_t buffer_size = 32 * 1024; 400 401 // handle_reader loops over input lines, restyling all digit-runs as more 402 // readable `nice numbers`, fulfilling the app's purpose 403 void handle_reader(bufwriter* w, FILE* src, const unsigned char* style) { 404 unsigned char prev = 0; 405 bufreader r = new_bufreader(src, buffer_size); 406 slice line = new_slice(buffer_size); 407 408 while (!w->done) { 409 int v = read_byte(&r); 410 if (v != EOF) { 411 // still more bytes to go 412 unsigned char b = v; 413 prev = b; 414 415 if (b != '\n') { 416 // no end of line yet 417 append_byte(&line, b); 418 continue; 419 } 420 421 // end of line 422 append_byte(&line, b); 423 restyle_line(w, line, style); 424 line.len = 0; 425 continue; 426 } 427 428 // input is over 429 break; 430 } 431 432 // don't forget the last line 433 restyle_line(w, line, style); 434 435 // ensure last output line ends with a line-feed since, at least on 436 // msys/windows, `less` hangs when lines with millions of symbols 437 // don't end with a lf 438 if (prev != '\n') { 439 write_byte(w, '\n'); 440 } 441 442 close_bufreader(&r); 443 free(line.ptr); 444 } 445 446 // handle_file handles data from the filename given; returns false only when 447 // the file can't be opened 448 bool handle_file(bufwriter* w, char* fname, const unsigned char* style) { 449 FILE* f = fopen(fname, "rb"); 450 if (f == NULL) { 451 // ensure currently-buffered/deferred output shows up right now: not 452 // doing so may scramble results in the common case where stdout and 453 // stderr are the same, thus confusing users 454 flush(w); 455 456 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname); 457 return false; 458 } 459 460 handle_reader(w, f, style); 461 fclose(f); 462 return true; 463 } 464 465 // run returns the number of errors 466 size_t run(int argc, char** argv) { 467 char* style = (char*)default_digits_style; 468 bufwriter w = new_bufwriter(stdout, buffer_size); 469 470 // handle leading options to change the ANSI-style used 471 size_t start = 1; 472 if (argc > 1 && argv[start][0] == '-') { 473 char* s = argv[start] + (argv[start][1] == '-' ? 2 : 1); 474 if (strcmp(s, "blue") == 0) { 475 style = "\x1b[38;5;26m"; 476 start++; 477 } else if (strcmp(s, "bold") == 0) { 478 style = "\x1b[1m"; 479 start++; 480 } else if (strcmp(s, "green") == 0) { 481 style = "\x1b[38;5;29m"; 482 start++; 483 } else if (strcmp(s, "gray") == 0) { 484 style = "\x1b[38;5;248m"; 485 start++; 486 } else if (strcmp(s, "highlight") == 0) { 487 style = "\x1b[7m"; 488 start++; 489 } else if (strcmp(s, "hilite") == 0) { 490 style = "\x1b[7m"; 491 start++; 492 } else if (strcmp(s, "inverse") == 0) { 493 style = "\x1b[7m"; 494 start++; 495 } else if (strcmp(s, "invert") == 0) { 496 style = "\x1b[7m"; 497 start++; 498 } else if (strcmp(s, "orange") == 0) { 499 style = "\x1b[38;5;166m"; 500 start++; 501 } else if (strcmp(s, "purple") == 0) { 502 style = "\x1b[38;5;99m"; 503 start++; 504 } else if (strcmp(s, "red") == 0) { 505 style = "\x1b[31m"; 506 start++; 507 } 508 } 509 510 const unsigned char* alt_style = (const unsigned char*)style; 511 512 // use stdin when not given any filepaths 513 if ((size_t)argc <= start) { 514 handle_reader(&w, stdin, alt_style); 515 close_bufwriter(&w); 516 return 0; 517 } 518 519 size_t errors = 0; 520 for (size_t i = start; i < (size_t)argc && !w.done; i++) { 521 if (i > start) { 522 // put an extra empty line between adjacent outputs 523 write_byte(&w, '\n'); 524 } 525 526 if (!handle_file(&w, argv[i], alt_style)) { 527 errors++; 528 } 529 } 530 531 close_bufwriter(&w); 532 return errors; 533 } 534 535 int main(int argc, char** argv) { 536 #ifdef _WIN32 537 setmode(fileno(stdin), O_BINARY); 538 // ensure output lines end in LF instead of CRLF on windows 539 setmode(fileno(stdout), O_BINARY); 540 setmode(fileno(stderr), O_BINARY); 541 #endif 542 543 // handle any of the help options, if given 544 if (argc > 1 && argv[1][0] == '-') { 545 const char* s = argv[1] + (argv[1][1] == '-' ? 2 : 1); 546 if (strcmp(s, "h") == 0 || strcmp(s, "help") == 0) { 547 puts(info); 548 return 0; 549 } 550 } 551 552 // disable automatic stdio buffering, in favor of explicit buffering 553 setvbuf(stdin, NULL, _IONBF, 0); 554 setvbuf(stdout, NULL, _IONBF, 0); 555 setvbuf(stderr, NULL, _IONBF, 0); 556 557 return run(argc, argv) == 0 ? 0 : 1; 558 }