File: nn.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./nn ./nn.c 29 30 Building with COMPACT_OUTPUT defined makes `nn` output many fewer bytes, at 31 the cost of using arguably worse colors. You can do that by running 32 33 cc -Wall -s -O2 -D COMPACT_OUTPUT -o ./nh ./nh.c 34 */ 35 36 #include <fcntl.h> 37 #include <stdbool.h> 38 #include <stddef.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 43 #ifdef _WIN32 44 #include <windows.h> 45 #endif 46 47 // #define COMPACT_OUTPUT 48 49 // info is the message shown when this app is given any of its help options 50 const char* info = "" 51 "nn [options...] [filepaths...]\n" 52 "\n" 53 "\n" 54 "Nice Numbers is an app which renders the plain text it's given to make long\n" 55 "numbers much easier to read, by alternating 3-digit groups which are colored\n" 56 "using ANSI-codes with unstyled ones.\n" 57 "\n" 58 "Unlike the common practice of inserting commas between 3-digit groups, this\n" 59 "alternative doesn't widen the original text, keeping any alignments the same.\n" 60 "\n" 61 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n" 62 "from the standard input.\n" 63 "\n" 64 "\n" 65 "Options, all of which can start with either 1 or 2 dashes:\n" 66 "\n" 67 "\n" 68 " -blue use a blue-like color to alternate-style runs of digits\n" 69 " -bold use a bold style/effect to alternate-style runs of digits\n" 70 " -gray use a gray color to alternate-style runs of digits\n" 71 " -green use a green color to alternate-style runs of digits\n" 72 " -inverse invert/swap colors to alternate-style runs of digits\n" 73 " -orange use an orange color to alternate-style runs of digits\n" 74 " -purple use a purple color to alternate-style runs of digits\n" 75 " -red use a red color to alternate-style runs of digits\n" 76 "\n" 77 " -h show this help message\n" 78 " -help show this help message\n" 79 "\n" 80 " -highlight same as option -inverse\n" 81 " -hilite same as option -inverse\n" 82 ""; 83 84 const char* line_memory_error_msg = 85 "" 86 "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n"; 87 88 // slice is a growable region of bytes in memory 89 typedef struct slice { 90 // ptr is the starting place of the region 91 unsigned char* ptr; 92 93 // len is how many bytes are currently being used 94 size_t len; 95 96 // cap is how many bytes the memory region has available 97 size_t cap; 98 } slice; 99 100 // init_slice is the constructor for type slice 101 void init_slice(slice* s, size_t cap) { 102 s->ptr = malloc(cap); 103 s->len = 0; 104 s->cap = cap; 105 } 106 107 // advance updates a slice so it starts after the number of bytes given 108 inline void advance(slice* src, size_t n) { 109 src->ptr += n; 110 src->len -= n; 111 } 112 113 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) { 114 fwrite(src, len, 1, w); 115 } 116 117 // find_digit returns the index of the first digit found, or a negative value 118 // on failure 119 long long int find_digit(slice s) { 120 for (size_t i = 0; i < s.len; i++) { 121 const unsigned char b = s.ptr[i]; 122 if ('0' <= b && b <= '9') { 123 return i; 124 } 125 } 126 return -1; 127 } 128 129 // find_non_digit returns the index of the first non-digit found, or a negative 130 // value on failure 131 long long int find_non_digit(slice s) { 132 for (size_t i = 0; i < s.len; i++) { 133 const unsigned char b = s.ptr[i]; 134 if (b < '0' || b > '9') { 135 return i; 136 } 137 } 138 return -1; 139 } 140 141 const unsigned char reset_style[] = "\x1b[0m"; 142 143 // restyle_digits renders a run of digits as alternating styled/unstyled runs 144 // of 3 digits, which greatly improves readability, and is the only purpose 145 // of this app; string is assumed to be all decimal digits 146 void restyle_digits(FILE* w, slice digits, slice style) { 147 if (digits.len < 4) { 148 // digit sequence is short, so emit it as is 149 write_bytes(w, digits.ptr, digits.len); 150 return; 151 } 152 153 // separate leading 0..2 digits which don't align with the 3-digit groups 154 size_t lead = digits.len % 3; 155 // emit leading digits unstyled, if there are any 156 write_bytes(w, digits.ptr, lead); 157 // the rest is guaranteed to have a length which is a multiple of 3 158 advance(&digits, lead); 159 160 // start with the alternate style, unless there were no leading digits 161 bool style_now = lead != 0; 162 163 while (digits.len > 0) { 164 if (style_now) { 165 write_bytes(w, style.ptr, style.len); 166 write_bytes(w, digits.ptr, 3); 167 write_bytes(w, reset_style, sizeof(reset_style) - 1); 168 } else { 169 write_bytes(w, digits.ptr, 3); 170 } 171 172 advance(&digits, 3); 173 // alternate between styled and unstyled 3-digit groups 174 style_now = !style_now; 175 } 176 } 177 178 // restyle_line renders the line given, using ANSI-styles to make any long 179 // numbers in it more legible 180 void restyle_line(FILE* w, slice line, slice alt_style) { 181 while (!feof(w) && line.len > 0) { 182 long int i = find_digit(line); 183 if (i < 0) { 184 // no (more) digits for sure 185 write_bytes(w, line.ptr, line.len); 186 return; 187 } 188 189 // some ANSI-style sequences use 4-digit numbers, which are long 190 // enough for this app to mangle 191 const unsigned char* p = line.ptr; 192 bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '['; 193 194 // emit line before current digit-run 195 write_bytes(w, line.ptr, i); 196 197 advance(&line, i); 198 199 // see where the digit-run ends 200 long int j = find_non_digit(line); 201 if (j < 0) { 202 // the digit-run goes until the end 203 if (!is_ansi) { 204 restyle_digits(w, line, alt_style); 205 } else { 206 write_bytes(w, line.ptr, line.len); 207 } 208 return; 209 } 210 211 // emit styled digit-run... maybe 212 if (!is_ansi) { 213 slice s; 214 s.ptr = line.ptr; 215 s.len = j; 216 s.cap = j; 217 restyle_digits(w, s, alt_style); 218 } else { 219 write_bytes(w, line.ptr, j); 220 } 221 222 // skip right past the end of the digit-run 223 advance(&line, j); 224 } 225 } 226 227 // default_digits_style makes it easy to change the built-in default style 228 unsigned char default_digits_style[] = "\x1b[38;5;248m"; 229 230 typedef struct handler_args { 231 FILE* w; 232 slice* line; 233 slice style; 234 } handler_args; 235 236 bool bom_start(slice s) { 237 const unsigned char* p = s.ptr; 238 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 239 } 240 241 // handle_lines loops over input lines, restyling all digit-runs as more 242 // readable `nice numbers`, fulfilling the app's purpose 243 bool handle_lines(handler_args args, FILE* src) { 244 FILE* w = args.w; 245 slice* line = args.line; 246 slice trimmed; 247 trimmed.cap = 0; 248 249 for (size_t i = 0; !feof(w); i++) { 250 int len = getline((char**)&line->ptr, &line->cap, src); 251 if (len < 0) { 252 break; 253 } 254 if (line->ptr == NULL) { 255 fprintf(stderr, line_memory_error_msg); 256 exit(1); 257 } 258 259 line->len = len; 260 trimmed.ptr = line->ptr; 261 trimmed.len = line->len; 262 263 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 264 if (i == 0 && bom_start(trimmed)) { 265 trimmed.ptr += 3; 266 trimmed.len -= 3; 267 len = trimmed.len; 268 } 269 270 const unsigned char* p = trimmed.ptr; 271 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 272 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 273 trimmed.len -= 2; 274 } else if (len >= 1 && p[len - 1] == '\n') { 275 trimmed.len--; 276 } 277 278 restyle_line(w, trimmed, args.style); 279 putc('\n', w); 280 } 281 282 return true; 283 } 284 285 // handle_file handles data from the filename given; returns false only when 286 // the file can't be opened 287 bool handle_file(handler_args args, const char* path) { 288 FILE* f = fopen(path, "rb"); 289 if (f == NULL) { 290 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 291 return false; 292 } 293 294 const bool ok = handle_lines(args, f); 295 fclose(f); 296 return ok; 297 } 298 299 const char *style_names_aliases[] = { 300 "b", "blue", 301 "g", "green", 302 "h", "inverse", 303 "i", "inverse", 304 "m", "magenta", 305 "o", "orange", 306 "p", "purple", 307 "r", "red", 308 "u", "underline", 309 310 "hi", "inverse", 311 "ma", "magenta", 312 "or", "orange", 313 "un", "underline", 314 315 "inv", "inverse", 316 "mag", "magenta", 317 318 "grey", "gray", 319 "highlight", "inverse", 320 "highlighted", "inverse", 321 "hilite", "inverse", 322 "hilited", "inverse", 323 "invert", "inverse", 324 "inverted", "inverse", 325 "underlined", "underline", 326 327 "bb", "blueback", 328 "gb", "greenback", 329 "mb", "magentaback", 330 "ob", "orangeback", 331 "pb", "purpleback", 332 "rb", "redback", 333 334 "greyback", "grayback", 335 }; 336 337 #ifdef COMPACT_OUTPUT 338 char *styles[] = { 339 "blue", "\x1b[38;5;26m", 340 "bold", "\x1b[1m", 341 "gray", "\x1b[38;5;248m", 342 "green", "\x1b[38;5;29m", 343 "inverse", "\x1b[7m", 344 "magenta", "\x1b[38;5;165m", 345 "orange", "\x1b[38;5;166m", 346 "purple", "\x1b[38;5;99m", 347 "red", "\x1b[38;5;1m", 348 "underline", "\x1b[4m", 349 350 "blueback", "\x1b[48;5;26m\x1b[38;5;15m", 351 "grayback", "\x1b[48;5;248m\x1b[38;5;15m", 352 "greenback", "\x1b[48;5;29m\x1b[38;5;15m", 353 "magentaback", "\x1b[48;5;165m\x1b[38;5;15m", 354 "orangeback", "\x1b[48;5;166m\x1b[38;5;15m", 355 "purpleback", "\x1b[48;5;99m\x1b[38;5;15m", 356 "redback", "\x1b[48;5;1m\x1b[38;5;15m", 357 }; 358 #else 359 char *styles[] = { 360 "blue", "\x1b[38;2;0;95;215m", 361 "bold", "\x1b[1m", 362 "gray", "\x1b[38;2;168;168;168m", 363 "green", "\x1b[38;2;0;135;95m", 364 "inverse", "\x1b[7m", 365 "magenta", "\x1b[38;2;215;0;255m", 366 "orange", "\x1b[38;2;215;95;0m", 367 "purple", "\x1b[38;2;135;95;255m", 368 "red", "\x1b[38;2;204;0;0m", 369 "underline", "\x1b[4m", 370 371 "blueback", "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m", 372 "grayback", "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m", 373 "greenback", "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m", 374 "magentaback", "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m", 375 "orangeback", "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m", 376 "purpleback", "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m", 377 "redback", "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m", 378 }; 379 #endif 380 381 bool change_style(const char* arg, slice* style) { 382 // style-changing options must have 1 or 2 leading dashes 383 if (arg[0] != '-') { 384 return false; 385 } 386 387 // skip up to 2 leading dashes 388 const char* s = arg + (arg[1] == '-' ? 2 : 1); 389 390 // resolve style-name aliases 391 const size_t n = sizeof(style_names_aliases) / sizeof(char*); 392 for (size_t i = 0; i < n; i += 2) { 393 if (strcmp(s, style_names_aliases[i]) == 0) { 394 s = style_names_aliases[i + 1]; 395 break; 396 } 397 } 398 399 // try to find ANSI-code for the style-name given 400 for (size_t i = 0; i < sizeof(styles) / sizeof(char *); i += 2) { 401 if (strcmp(s, styles[i]) == 0) { 402 style->ptr = (unsigned char*)styles[i + 1]; 403 style->len = strlen(styles[i + 1]); 404 return true; 405 } 406 } 407 408 return false; 409 } 410 411 // run returns the number of errors 412 int run(int argc, char** argv, FILE* w, slice* line) { 413 size_t files = 0; 414 size_t errors = 0; 415 416 handler_args args; 417 args.w = w; 418 args.line = line; 419 args.style.ptr = default_digits_style; 420 args.style.len = strlen((char*)default_digits_style); 421 422 for (size_t i = 1; i < (size_t)argc && !feof(w); i++) { 423 const char* arg = argv[i]; 424 425 // `-` means standard input 426 if (arg[0] == '-' && arg[1] == 0) { 427 if (!handle_lines(args, stdin)) { 428 errors++; 429 } 430 files++; 431 continue; 432 } 433 434 if (arg[0] == '-') { 435 if (!change_style(arg, &args.style)) { 436 char* fmt = "\x1b[31munsupported style named %s\x1b[0m\n"; 437 fprintf(stderr, fmt, arg); 438 errors++; 439 } 440 continue; 441 } 442 443 if (!handle_file(args, arg)) { 444 errors++; 445 } 446 files++; 447 } 448 449 // use stdin when not given any filepaths 450 if (files == 0) { 451 if (!handle_lines(args, stdin)) { 452 errors++; 453 } 454 } 455 456 return errors; 457 } 458 459 // is_help_option simplifies control-flow for func main 460 bool is_help_option(char* s) { 461 return (s[0] == '-') && ( 462 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 463 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 464 ); 465 } 466 467 int main(int argc, char** argv) { 468 #ifdef _WIN32 469 setmode(fileno(stdin), O_BINARY); 470 // ensure output lines end in LF instead of CRLF on windows 471 setmode(fileno(stdout), O_BINARY); 472 setmode(fileno(stderr), O_BINARY); 473 #endif 474 475 // handle any of the help options, if given 476 if (argc > 1 && is_help_option(argv[1])) { 477 puts(info); 478 return 0; 479 } 480 481 slice line; 482 init_slice(&line, 32 * 1024); 483 if (line.ptr == NULL) { 484 fprintf(stderr, line_memory_error_msg); 485 return 1; 486 } 487 488 const int res = run(argc, argv, stdout, &line) == 0 ? 0 : 1; 489 free(line.ptr); 490 return res; 491 }