File: plain.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./plain ./plain.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdbool.h>
  33 #include <stddef.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <unistd.h>
  38 
  39 #ifdef _WIN32
  40 #include <fcntl.h>
  41 #include <windows.h>
  42 #endif
  43 
  44 #ifdef RED_ERRORS
  45 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  46 #ifdef __APPLE__
  47 #define ERROR_STYLE "\x1b[31m"
  48 #endif
  49 #define RESET_STYLE "\x1b[0m"
  50 #else
  51 #define ERROR_STYLE
  52 #define RESET_STYLE
  53 #endif
  54 
  55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  56 
  57 #define BAD_ALLOC 2
  58 
  59 const char* info = ""
  60 "plain [options...] [filepaths...]\n"
  61 "\n"
  62 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n"
  63 "be UTF-8. When not given any filepaths, the standard input is used.\n"
  64 "\n"
  65 "Options, all of which can start with either 1 or 2 dashes:\n"
  66 "\n"
  67 "  -h          show this help message\n"
  68 "  -help       show this help message\n"
  69 "";
  70 
  71 // span is a region of bytes in memory
  72 typedef struct span {
  73     // ptr is the starting place of the region
  74     unsigned char* ptr;
  75 
  76     // len is how many bytes are in the region
  77     size_t len;
  78 } span;
  79 
  80 // advance updates a span so it starts after the number of bytes given
  81 static inline void advance(span* src, size_t n) {
  82     src->ptr += n;
  83     src->len -= n;
  84 }
  85 
  86 // slice is a growable region of bytes in memory
  87 typedef struct slice {
  88     // ptr is the starting place of the region
  89     unsigned char* ptr;
  90 
  91     // cap is how many bytes the memory region has available
  92     size_t cap;
  93 } slice;
  94 
  95 // find_esc_pair tries to find the starting index of either 2-byte substrings
  96 // "\x1b[" or "\x1b]"
  97 static inline int64_t find_esc_pair(span line, size_t start) {
  98     bool esc = false;
  99 
 100     for (size_t i = start; i < line.len; i++) {
 101         unsigned char cur = line.ptr[i];
 102 
 103         if (cur == '\x1b') {
 104             esc = true;
 105             continue;
 106         }
 107 
 108         if (esc && (cur == '[' || cur == ']')) {
 109             return i - 1;
 110         }
 111 
 112         esc = false;
 113     }
 114 
 115     return -1;
 116 }
 117 
 118 // find_alpha tries to find the position of the first letter in a string
 119 static inline int64_t find_alpha(span line) {
 120     for (size_t i = 0; i < line.len; i++) {
 121         if (isalpha(line.ptr[i])) {
 122             return i;
 123         }
 124     }
 125     return -1;
 126 }
 127 
 128 // find_byte tries to find the first position of the value given in a string
 129 static inline int64_t find_byte(span line, unsigned char what) {
 130     for (size_t i = 0; i < line.len; i++) {
 131         if (line.ptr[i] == what) {
 132             return i;
 133         }
 134     }
 135     return -1;
 136 }
 137 
 138 // find_osc_end tries to find the position where an OSC sequence ends
 139 int64_t find_osc_end(span line) {
 140     unsigned char prev = 0;
 141 
 142     for (size_t i = 0; i < line.len; i++) {
 143         const unsigned char cur = line.ptr[i];
 144         if (prev == '\x1b' && cur == '\\') {
 145             return i;
 146         }
 147         prev = cur;
 148     }
 149 
 150     return -1;
 151 }
 152 
 153 // destyle_line renders the line given, omitting ANSI-styles
 154 void destyle_line(FILE* w, span line) {
 155     while (line.len > 0) {
 156         int64_t j = find_esc_pair(line, 0);
 157         if (j < 0) {
 158             fwrite(line.ptr, 1, line.len, w);
 159             return;
 160         }
 161 
 162         fwrite(line.ptr, 1, j, w);
 163         advance(&line, j);
 164 
 165         if (line.ptr[1] == '[') {
 166             j = find_alpha(line);
 167             if (j < 0) {
 168                 return;
 169             }
 170             advance(&line, j + 1);
 171             continue;
 172         }
 173 
 174         j = find_osc_end(line);
 175         if (j < 0) {
 176             return;
 177         }
 178         advance(&line, j + 1);
 179     }
 180 }
 181 
 182 bool starts_with_bom(span s) {
 183     const unsigned char* p = s.ptr;
 184     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 185 }
 186 
 187 // handle_lines loops over input lines, restyling all digit-runs as more
 188 // readable `nice numbers`, fulfilling the app's purpose
 189 void handle_lines(FILE* w, slice* line, FILE* src, bool live_lines) {
 190     span trimmed;
 191 
 192     for (size_t i = 0; !feof(w); i++) {
 193         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 194         if (line->ptr == NULL) {
 195             fprintf(stderr, "\n");
 196             fprintf(stderr, ERROR_LINE("out of memory"));
 197             exit(BAD_ALLOC);
 198         }
 199 
 200         if (len < 0) {
 201             break;
 202         }
 203 
 204         trimmed.ptr = line->ptr;
 205         trimmed.len = len;
 206 
 207         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 208         if (i == 0 && starts_with_bom(trimmed)) {
 209             trimmed.ptr += 3;
 210             trimmed.len -= 3;
 211             len = trimmed.len;
 212         }
 213 
 214         const unsigned char* p = trimmed.ptr;
 215         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 216         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 217             trimmed.len -= 2;
 218         } else if (len >= 1 && p[len - 1] == '\n') {
 219             trimmed.len--;
 220         }
 221 
 222         destyle_line(w, trimmed);
 223         fputc('\n', w);
 224     }
 225 
 226     if (!live_lines) {
 227         fflush(w);
 228     }
 229 }
 230 
 231 // handle_file handles data from the filename given; returns false only when
 232 // the file can't be opened
 233 bool handle_file(FILE* w, slice* line, const char* path, bool live_lines) {
 234     FILE* f = fopen(path, "rb");
 235     if (f == NULL) {
 236         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 237         return false;
 238     }
 239 
 240     handle_lines(w, line, f, live_lines);
 241     fclose(f);
 242     return true;
 243 }
 244 
 245 // run returns the number of errors
 246 int run(char** args, size_t nargs, FILE* w, bool live_lines) {
 247     size_t dashes = 0;
 248     for (int i = 0; i < nargs; i++) {
 249         if (strcmp(args[i], "-") == 0) {
 250             dashes++;
 251         }
 252     }
 253 
 254     if (dashes > 1) {
 255         const char* m = "can't use the standard input (dash) more than once";
 256         fprintf(stderr, ERROR_LINE("%s"), m);
 257         return 1;
 258     }
 259 
 260     size_t errors = 0;
 261 
 262     slice line;
 263     line.cap = 32 * 1024;
 264     line.ptr = malloc(line.cap);
 265 
 266     if (line.ptr == NULL) {
 267         fprintf(stderr, ERROR_LINE("out of memory"));
 268         exit(BAD_ALLOC);
 269     }
 270 
 271     for (size_t i = 0; i < nargs && !feof(w); i++) {
 272         if (strcmp(args[i], "-") == 0) {
 273             // `-` means standard input
 274             handle_lines(w, &line, stdin, live_lines);
 275             continue;
 276         }
 277 
 278         if (!handle_file(w, &line, args[i], live_lines)) {
 279             errors++;
 280         }
 281     }
 282 
 283     // use stdin when not given any filepaths
 284     if (nargs == 0) {
 285         handle_lines(w, &line, stdin, live_lines);
 286     }
 287 
 288     if (!live_lines) {
 289         fflush(w);
 290     }
 291     free(line.ptr);
 292     return errors;
 293 }
 294 
 295 int main(int argc, char** argv) {
 296 #ifdef _WIN32
 297     setmode(fileno(stdin), O_BINARY);
 298     // ensure output lines end in LF instead of CRLF on windows
 299     setmode(fileno(stdout), O_BINARY);
 300     setmode(fileno(stderr), O_BINARY);
 301 #endif
 302 
 303     if (argc > 1) {
 304         if (
 305             strcmp(argv[1], "-h") == 0 ||
 306             strcmp(argv[1], "-help") == 0 ||
 307             strcmp(argv[1], "--h") == 0 ||
 308             strcmp(argv[1], "--help") == 0
 309         ) {
 310             fprintf(stdout, "%s", info);
 311             return 0;
 312         }
 313     }
 314 
 315     size_t nargs = argc - 1;
 316     char** args = argv + 1;
 317     bool buffered = false;
 318 
 319     if (nargs > 0) {
 320         if (
 321             strcmp(args[0], "-buffered") == 0 ||
 322             strcmp(args[0], "--buffered") == 0
 323         ) {
 324             buffered = true;
 325             nargs--;
 326             args++;
 327         }
 328     }
 329 
 330     if (nargs > 0 && strcmp(args[0], "--") == 0) {
 331         nargs--;
 332         args++;
 333     }
 334 
 335     const int fd = fileno(stdout);
 336     const bool live_lines = !buffered && lseek(fd, 0, SEEK_CUR) != 0;
 337     if (live_lines) {
 338         setvbuf(stdout, NULL, _IOLBF, 0);
 339     } else {
 340         setvbuf(stdout, NULL, _IOFBF, 0);
 341     }
 342     return run(args, nargs, stdout, live_lines) == 0 ? 0 : 1;
 343 }