File: plain.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./plain ./plain.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdbool.h>
  33 #include <stddef.h>
  34 #include <stdint.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 #include <unistd.h>
  39 
  40 #ifdef _WIN32
  41 #include <fcntl.h>
  42 #include <windows.h>
  43 #endif
  44 
  45 #ifdef RED_ERRORS
  46 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  47 #ifdef __APPLE__
  48 #define ERROR_STYLE "\x1b[31m"
  49 #endif
  50 #define RESET_STYLE "\x1b[0m"
  51 #else
  52 #define ERROR_STYLE
  53 #define RESET_STYLE
  54 #endif
  55 
  56 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  57 
  58 #define BAD_ALLOC 2
  59 
  60 const char* info = ""
  61 "plain [options...] [filepaths...]\n"
  62 "\n"
  63 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n"
  64 "be UTF-8. When not given any filepaths, the standard input is used.\n"
  65 "\n"
  66 "Options, all of which can start with either 1 or 2 dashes:\n"
  67 "\n"
  68 "  -h          show this help message\n"
  69 "  -help       show this help message\n"
  70 "";
  71 
  72 // span is a region of bytes in memory
  73 typedef struct span {
  74     // ptr is the starting place of the region
  75     unsigned char* ptr;
  76 
  77     // len is how many bytes are in the region
  78     size_t len;
  79 } span;
  80 
  81 // advance updates a span so it starts after the number of bytes given
  82 static inline void advance(span* src, size_t n) {
  83     src->ptr += n;
  84     src->len -= n;
  85 }
  86 
  87 // slice is a growable region of bytes in memory
  88 typedef struct slice {
  89     // ptr is the starting place of the region
  90     unsigned char* ptr;
  91 
  92     // cap is how many bytes the memory region has available
  93     size_t cap;
  94 } slice;
  95 
  96 // find_esc_pair tries to find the starting index of either 2-byte substrings
  97 // "\x1b[" or "\x1b]"
  98 static inline int64_t find_esc_pair(span line, size_t start) {
  99     bool esc = false;
 100 
 101     for (size_t i = start; i < line.len; i++) {
 102         unsigned char cur = line.ptr[i];
 103 
 104         if (cur == '\x1b') {
 105             esc = true;
 106             continue;
 107         }
 108 
 109         if (esc && (cur == '[' || cur == ']')) {
 110             return i - 1;
 111         }
 112 
 113         esc = false;
 114     }
 115 
 116     return -1;
 117 }
 118 
 119 // find_alpha tries to find the position of the first letter in a string
 120 static inline int64_t find_alpha(span line) {
 121     for (size_t i = 0; i < line.len; i++) {
 122         if (isalpha(line.ptr[i])) {
 123             return i;
 124         }
 125     }
 126     return -1;
 127 }
 128 
 129 // find_byte tries to find the first position of the value given in a string
 130 static inline int64_t find_byte(span line, unsigned char what) {
 131     for (size_t i = 0; i < line.len; i++) {
 132         if (line.ptr[i] == what) {
 133             return i;
 134         }
 135     }
 136     return -1;
 137 }
 138 
 139 // find_osc_end tries to find the position where an OSC sequence ends
 140 int64_t find_osc_end(span line) {
 141     unsigned char prev = 0;
 142 
 143     for (size_t i = 0; i < line.len; i++) {
 144         const unsigned char cur = line.ptr[i];
 145         if (prev == '\x1b' && cur == '\\') {
 146             return i;
 147         }
 148         prev = cur;
 149     }
 150 
 151     return -1;
 152 }
 153 
 154 // destyle_line renders the line given, omitting ANSI-styles
 155 void destyle_line(FILE* w, span line) {
 156     while (line.len > 0) {
 157         int64_t j = find_esc_pair(line, 0);
 158         if (j < 0) {
 159             fwrite(line.ptr, 1, line.len, w);
 160             return;
 161         }
 162 
 163         fwrite(line.ptr, 1, j, w);
 164         advance(&line, j);
 165 
 166         if (line.ptr[1] == '[') {
 167             j = find_alpha(line);
 168             if (j < 0) {
 169                 return;
 170             }
 171             advance(&line, j + 1);
 172             continue;
 173         }
 174 
 175         j = find_osc_end(line);
 176         if (j < 0) {
 177             return;
 178         }
 179         advance(&line, j + 1);
 180     }
 181 }
 182 
 183 bool starts_with_bom(span s) {
 184     const unsigned char* p = s.ptr;
 185     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 186 }
 187 
 188 // handle_lines loops over input lines, restyling all digit-runs as more
 189 // readable `nice numbers`, fulfilling the app's purpose
 190 void handle_lines(FILE* w, slice* line, FILE* src, bool live_lines) {
 191     span trimmed;
 192 
 193     for (size_t i = 0; !feof(w); i++) {
 194         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 195         if (line->ptr == NULL) {
 196             fprintf(stderr, "\n");
 197             fprintf(stderr, ERROR_LINE("out of memory"));
 198             exit(BAD_ALLOC);
 199         }
 200 
 201         if (len < 0) {
 202             break;
 203         }
 204 
 205         trimmed.ptr = line->ptr;
 206         trimmed.len = len;
 207 
 208         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 209         if (i == 0 && starts_with_bom(trimmed)) {
 210             trimmed.ptr += 3;
 211             trimmed.len -= 3;
 212             len = trimmed.len;
 213         }
 214 
 215         const unsigned char* p = trimmed.ptr;
 216         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 217         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 218             trimmed.len -= 2;
 219         } else if (len >= 1 && p[len - 1] == '\n') {
 220             trimmed.len--;
 221         }
 222 
 223         destyle_line(w, trimmed);
 224         fputc('\n', w);
 225     }
 226 
 227     if (!live_lines) {
 228         fflush(w);
 229     }
 230 }
 231 
 232 // handle_file handles data from the filename given; returns false only when
 233 // the file can't be opened
 234 bool handle_file(FILE* w, slice* line, const char* path, bool live_lines) {
 235     FILE* f = fopen(path, "rb");
 236     if (f == NULL) {
 237         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 238         return false;
 239     }
 240 
 241     handle_lines(w, line, f, live_lines);
 242     fclose(f);
 243     return true;
 244 }
 245 
 246 // run returns the number of errors
 247 int run(char** args, size_t nargs, FILE* w, bool live_lines) {
 248     size_t dashes = 0;
 249     for (int i = 0; i < nargs; i++) {
 250         if (strcmp(args[i], "-") == 0) {
 251             dashes++;
 252         }
 253     }
 254 
 255     if (dashes > 1) {
 256         const char* m = "can't use the standard input (dash) more than once";
 257         fprintf(stderr, ERROR_LINE("%s"), m);
 258         return 1;
 259     }
 260 
 261     size_t errors = 0;
 262 
 263     slice line;
 264     line.cap = 32 * 1024;
 265     line.ptr = malloc(line.cap);
 266 
 267     if (line.ptr == NULL) {
 268         fprintf(stderr, ERROR_LINE("out of memory"));
 269         exit(BAD_ALLOC);
 270     }
 271 
 272     for (size_t i = 0; i < nargs && !feof(w); i++) {
 273         if (strcmp(args[i], "-") == 0) {
 274             // `-` means standard input
 275             handle_lines(w, &line, stdin, live_lines);
 276             continue;
 277         }
 278 
 279         if (!handle_file(w, &line, args[i], live_lines)) {
 280             errors++;
 281         }
 282     }
 283 
 284     // use stdin when not given any filepaths
 285     if (nargs == 0) {
 286         handle_lines(w, &line, stdin, live_lines);
 287     }
 288 
 289     if (!live_lines) {
 290         fflush(w);
 291     }
 292     free(line.ptr);
 293     return errors;
 294 }
 295 
 296 int main(int argc, char** argv) {
 297 #ifdef _WIN32
 298     setmode(fileno(stdin), O_BINARY);
 299     // ensure output lines end in LF instead of CRLF on windows
 300     setmode(fileno(stdout), O_BINARY);
 301     setmode(fileno(stderr), O_BINARY);
 302 #endif
 303 
 304     if (argc > 1) {
 305         if (
 306             strcmp(argv[1], "-h") == 0 ||
 307             strcmp(argv[1], "-help") == 0 ||
 308             strcmp(argv[1], "--h") == 0 ||
 309             strcmp(argv[1], "--help") == 0
 310         ) {
 311             fprintf(stdout, "%s", info);
 312             return 0;
 313         }
 314     }
 315 
 316     size_t nargs = argc - 1;
 317     char** args = argv + 1;
 318     bool buffered = false;
 319 
 320     if (nargs > 0) {
 321         if (
 322             strcmp(args[0], "-b") == 0 ||
 323             strcmp(args[0], "--b") == 0 ||
 324             strcmp(args[0], "-buffered") == 0 ||
 325             strcmp(args[0], "--buffered") == 0
 326         ) {
 327             buffered = true;
 328             nargs--;
 329             args++;
 330         }
 331     }
 332 
 333     if (nargs > 0 && strcmp(args[0], "--") == 0) {
 334         nargs--;
 335         args++;
 336     }
 337 
 338     const int fd = fileno(stdout);
 339     const bool live_lines = !buffered && lseek(fd, 0, SEEK_CUR) != 0;
 340     if (live_lines) {
 341         setvbuf(stdout, NULL, _IOLBF, 0);
 342     } else {
 343         setvbuf(stdout, NULL, _IOFBF, 0);
 344     }
 345     return run(args, nargs, stdout, live_lines) == 0 ? 0 : 1;
 346 }