/* The MIT License (MIT) Copyright © 2024 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* You can build this command-line app by running cc -Wall -s -O2 -o ./nn ./nn.c */ #include #include #include #include #include #include // info is the message shown when this app is given any of its help options const char* info = "" "nn [options...] [filepaths...]\n" "\n" "\n" "Nice Numbers is an app which renders the plain text it's given to make long\n" "numbers much easier to read, by alternating 3-digit groups which are colored\n" "using ANSI-codes with unstyled ones.\n" "\n" "Unlike the common practice of inserting commas between 3-digit groups, this\n" "alternative doesn't widen the original text, keeping any alignments the same.\n" "\n" "All input is assumed to be UTF-8. When not given any filepaths, input is read\n" "from the standard input.\n" "\n" "\n" "Options, all of which can start with either 1 or 2 dashes:\n" "\n" "\n" " -blue use a blue-like color to alternate-style runs of digits\n" " -bold use a bold style/effect to alternate-style runs of digits\n" " -gray use a gray color to alternate-style runs of digits\n" " -green use a green color to alternate-style runs of digits\n" " -inverse invert/swap colors to alternate-style runs of digits\n" " -orange use an orange color to alternate-style runs of digits\n" " -purple use a purple color to alternate-style runs of digits\n" " -red use a red color to alternate-style runs of digits\n" "\n" " -h show this help message\n" " -help show this help message\n" "\n" " -highlight same as option -inverse\n" " -hilite same as option -inverse\n" ""; // slice is a growable region of bytes in memory typedef struct slice { // ptr is the starting place of the region unsigned char* ptr; // len is how many bytes are currently being used size_t len; // cap is how many bytes the memory region has available size_t cap; } slice; // new_slice is the constructor for type slice slice new_slice(size_t cap) { slice res; res.cap = cap; res.len = 0; res.ptr = malloc(res.cap); return res; } // advance updates a slice so it starts after the number of bytes given inline void advance(slice* src, size_t n) { src->ptr += n; src->len -= n; } // first creates a slice ending at the number of bytes given slice first(slice src, size_t n) { src.len = n; return src; } // append_byte does as it says, potentially reallocating the memory area // backing the slice given void append_byte(slice* s, unsigned char b) { if (s->len < s->cap) { // under capacity, so it's ok to append directly s->ptr[s->len] = b; s->len++; return; } // slice is full, so double it and reallocate s->cap *= 2; s->ptr = realloc(s->ptr, s->cap); // now append directly to the larger array s->ptr[s->len] = b; s->len++; } // find_lf returns the index of the first line-feed found, or a negative value // on failure long long int find_lf(slice s) { for (size_t i = 0; i < s.len; i++) { if (s.ptr[i] == '\n') { return i; } } return -1; } // find_digit returns the index of the first digit found, or a negative value // on failure long long int find_digit(slice s) { for (size_t i = 0; i < s.len; i++) { const unsigned char b = s.ptr[i]; if ('0' <= b && b <= '9') { return i; } } return -1; } // find_non_digit returns the index of the first non-digit found, or a negative // value on failure long long int find_non_digit(slice s) { for (size_t i = 0; i < s.len; i++) { const unsigned char b = s.ptr[i]; if (b < '0' || b > '9') { return i; } } return -1; } const unsigned char reset_style[] = "\x1b[0m"; // bufreader is a way to speed up reading data by reducing the frequency of // data reads from the a data source, while still allowing reading 1 byte at // a time typedef struct bufreader { // buf is the buffer, (re)filled periodically as needed unsigned char* buf; // len is how many buffer bytes are being used, out of its max capacity size_t len; // cap is the buffer's capacity, or the most bytes it can hold at once size_t cap; // pos is the current position, up to the current buffer length size_t pos; // src is the data source used to fill the buffer FILE* src; } bufreader; // new_bufreader is the constructor for type bufreader bufreader new_bufreader(FILE* src, size_t cap) { bufreader res; res.cap = cap; res.len = 0; res.pos = 0; res.src = src; res.buf = malloc(res.cap); return res; } // close_bufreader deallocates the buffer void close_bufreader(bufreader* r) { free(r->buf); r->buf = NULL; r->len = 0; } // read_byte does as it says: check its return for the value EOF, before // using it as the next byte int read_byte(bufreader* r) { if (r->pos < r->len) { // inside current chunk const unsigned char b = r->buf[r->pos]; r->pos++; return b; } // need to read the next block r->pos = 0; r->len = fread(r->buf, sizeof(unsigned char), r->cap, r->src); if (r->len > 0) { const unsigned char b = r->buf[r->pos]; r->pos++; return b; } // reached the end of data return EOF; } // bufwriter is, as the name implies, a buffered-writer: when it's aimed at // stdout, it considerably speeds up this app, as intended typedef struct bufwriter { // buf is the buffer proper unsigned char* buf; // len is how many bytes of the buffer are currently being used size_t len; // cap is the capacity of the buffer, or the most bytes it can hold size_t cap; // out is the destination of all that's written into the buffer FILE* out; // done signals when/if no more output is accepted at the destination bool done; } bufwriter; // new_bufwriter is the constructor for type bufwriter bufwriter new_bufwriter(FILE* dst, size_t cap) { bufwriter res; res.cap = cap; res.done = false; res.len = 0; res.out = dst; res.buf = malloc(res.cap); return res; } // flush does as it says: it empties the buffer after ensuring its bytes end // on their intended destination void flush(bufwriter* w) { if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) { w->done = true; } w->len = 0; } // close_bufwriter ensures all output is shown and deallocates the buffer void close_bufwriter(bufwriter* w) { flush(w); free(w->buf); w->buf = NULL; } // write_bytes does as it says, minimizing the number of calls to fwrite void write_bytes(bufwriter* w, const unsigned char* src, size_t len) { if (w->len + len < w->cap) { // all bytes fit into buffer memcpy(w->buf + w->len, src, len); w->len += len; return; } // ensure current buffer bytes go out, before crossing strides flush(w); // emit all chunks striding beyond/at the buffer's capacity for (; len >= w->cap; src += w->cap, len -= w->cap) { if (fwrite(src, w->cap, 1, w->out) < 1) { w->done = true; return; } } // now all, if any, remaining bytes will fit into the buffer memcpy(w->buf, src, len); w->len += len; } // write_byte does as it says void write_byte(bufwriter* w, unsigned char b) { if (w->len >= w->cap) { flush(w); } w->buf[w->len] = b; w->len++; } // restyle_digits renders a run of digits as alternating styled/unstyled runs // of 3 digits, which greatly improves readability, and is the only purpose // of this app; string is assumed to be all decimal digits void restyle_digits(bufwriter* w, slice digits, const unsigned char* style) { if (digits.len < 4) { // digit sequence is short, so emit it as is write_bytes(w, digits.ptr, digits.len); return; } // separate leading 0..2 digits which don't align with the 3-digit groups size_t lead = digits.len % 3; // emit leading digits unstyled, if there are any write_bytes(w, digits.ptr, lead); // the rest is guaranteed to have a length which is a multiple of 3 advance(&digits, lead); size_t style_len = strlen((const char*)style); // start with the alternate style, unless there were no leading digits bool style_now = lead != 0; while (digits.len > 0) { if (style_now) { write_bytes(w, style, style_len); write_bytes(w, digits.ptr, 3); write_bytes(w, reset_style, sizeof(reset_style) - 1); } else { write_bytes(w, digits.ptr, 3); } advance(&digits, 3); // alternate between styled and unstyled 3-digit groups style_now = !style_now; } } // restyle_line renders the line given, using ANSI-styles to make any long // numbers in it more legible void restyle_line(bufwriter* w, slice line, const unsigned char* alt_style) { while (!w->done && line.len > 0) { long int i = find_digit(line); if (i < 0) { // no (more) digits for sure write_bytes(w, line.ptr, line.len); return; } // some ANSI-style sequences use 4-digit numbers, which are long // enough for this app to mangle const unsigned char* p = line.ptr; bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '['; // emit line before current digit-run write_bytes(w, line.ptr, i); advance(&line, i); // see where the digit-run ends long int j = find_non_digit(line); if (j < 0) { // the digit-run goes until the end if (!is_ansi) { restyle_digits(w, line, alt_style); } else { write_bytes(w, line.ptr, line.len); } return; } // emit styled digit-run... maybe if (!is_ansi) { slice s; s.ptr = line.ptr; s.len = j; restyle_digits(w, s, alt_style); } else { write_bytes(w, line.ptr, j); } // skip right past the end of the digit-run advance(&line, j); } } /* The info-message string below was made by running the command awk 'BEGIN { print "const char* info = \"\"" } { printf "\"%s\\n\"\n", $0 } END { print "\"\";" }' info.txt */ // default_digits_style makes it easy to change the built-in default style const unsigned char default_digits_style[] = "\x1b[38;5;248m"; // buffer_size is trying to be a good value for modern CPU cores const size_t buffer_size = 32 * 1024; // handle_reader loops over input lines, restyling all digit-runs as more // readable `nice numbers`, fulfilling the app's purpose void handle_reader(bufwriter* w, FILE* src, const unsigned char* style) { unsigned char prev = 0; bufreader r = new_bufreader(src, buffer_size); slice line = new_slice(buffer_size); while (!w->done) { int v = read_byte(&r); if (v != EOF) { // still more bytes to go unsigned char b = v; prev = b; if (b != '\n') { // no end of line yet append_byte(&line, b); continue; } // end of line append_byte(&line, b); restyle_line(w, line, style); line.len = 0; continue; } // input is over break; } // don't forget the last line restyle_line(w, line, style); // ensure last output line ends with a line-feed since, at least on // msys/windows, `less` hangs when lines with millions of symbols // don't end with a lf if (prev != '\n') { write_byte(w, '\n'); } close_bufreader(&r); free(line.ptr); } // handle_file handles data from the filename given; returns false only when // the file can't be opened bool handle_file(bufwriter* w, char* fname, const unsigned char* style) { FILE* f = fopen(fname, "rb"); if (f == NULL) { // ensure currently-buffered/deferred output shows up right now: not // doing so may scramble results in the common case where stdout and // stderr are the same, thus confusing users flush(w); fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname); return false; } handle_reader(w, f, style); fclose(f); return true; } // run returns the number of errors size_t run(int argc, char** argv) { char* style = (char*)default_digits_style; bufwriter w = new_bufwriter(stdout, buffer_size); // handle leading options to change the ANSI-style used size_t start = 1; if (argc > 1 && argv[start][0] == '-') { char* s = argv[start] + (argv[start][1] == '-' ? 2 : 1); if (strcmp(s, "blue") == 0) { style = "\x1b[38;5;26m"; start++; } else if (strcmp(s, "bold") == 0) { style = "\x1b[1m"; start++; } else if (strcmp(s, "green") == 0) { style = "\x1b[38;5;29m"; start++; } else if (strcmp(s, "gray") == 0) { style = "\x1b[38;5;248m"; start++; } else if (strcmp(s, "highlight") == 0) { style = "\x1b[7m"; start++; } else if (strcmp(s, "hilite") == 0) { style = "\x1b[7m"; start++; } else if (strcmp(s, "inverse") == 0) { style = "\x1b[7m"; start++; } else if (strcmp(s, "invert") == 0) { style = "\x1b[7m"; start++; } else if (strcmp(s, "orange") == 0) { style = "\x1b[38;5;166m"; start++; } else if (strcmp(s, "purple") == 0) { style = "\x1b[38;5;99m"; start++; } else if (strcmp(s, "red") == 0) { style = "\x1b[31m"; start++; } } const unsigned char* alt_style = (const unsigned char*)style; // use stdin when not given any filepaths if ((size_t)argc <= start) { handle_reader(&w, stdin, alt_style); close_bufwriter(&w); return 0; } size_t errors = 0; for (size_t i = start; i < (size_t)argc && !w.done; i++) { if (i > start) { // put an extra empty line between adjacent outputs write_byte(&w, '\n'); } if (!handle_file(&w, argv[i], alt_style)) { errors++; } } close_bufwriter(&w); return errors; } int main(int argc, char** argv) { #ifdef _WIN32 setmode(fileno(stdin), O_BINARY); // ensure output lines end in LF instead of CRLF on windows setmode(fileno(stdout), O_BINARY); setmode(fileno(stderr), O_BINARY); #endif // handle any of the help options, if given if (argc > 1 && argv[1][0] == '-') { const char* s = argv[1] + (argv[1][1] == '-' ? 2 : 1); if (strcmp(s, "h") == 0 || strcmp(s, "help") == 0) { puts(info); return 0; } } // disable automatic stdio buffering, in favor of explicit buffering setvbuf(stdin, NULL, _IONBF, 0); setvbuf(stdout, NULL, _IONBF, 0); setvbuf(stderr, NULL, _IONBF, 0); return run(argc, argv) == 0 ? 0 : 1; }