File: plain.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./plain ./plain.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdbool.h>
  33 #include <stddef.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <unistd.h>
  38 
  39 #ifdef _WIN32
  40 #include <fcntl.h>
  41 #include <windows.h>
  42 #endif
  43 
  44 #ifdef RED_ERRORS
  45 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  46 #ifdef __APPLE__
  47 #define ERROR_STYLE "\x1b[31m"
  48 #endif
  49 #define RESET_STYLE "\x1b[0m"
  50 #else
  51 #define ERROR_STYLE
  52 #define RESET_STYLE
  53 #endif
  54 
  55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  56 
  57 #define BAD_ALLOC 2
  58 
  59 const char* info = ""
  60 "plain [options...] [filepaths...]\n"
  61 "\n"
  62 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n"
  63 "be UTF-8. When not given any filepaths, the standard input is used.\n"
  64 "\n"
  65 "Options, all of which can start with either 1 or 2 dashes:\n"
  66 "\n"
  67 "  -h          show this help message\n"
  68 "  -help       show this help message\n"
  69 "";
  70 
  71 // span is a region of bytes in memory
  72 typedef struct span {
  73     // ptr is the starting place of the region
  74     unsigned char* ptr;
  75 
  76     // len is how many bytes are in the region
  77     size_t len;
  78 } span;
  79 
  80 // advance updates a span so it starts after the number of bytes given
  81 static inline void advance(span* src, size_t n) {
  82     src->ptr += n;
  83     src->len -= n;
  84 }
  85 
  86 // slice is a growable region of bytes in memory
  87 typedef struct slice {
  88     // ptr is the starting place of the region
  89     unsigned char* ptr;
  90 
  91     // cap is how many bytes the memory region has available
  92     size_t cap;
  93 } slice;
  94 
  95 // find_esc_pair tries to find the starting index of either 2-byte substrings
  96 // "\x1b[" or "\x1b]"
  97 static inline int64_t find_esc_pair(span line, size_t start) {
  98     bool esc = false;
  99 
 100     for (size_t i = start; i < line.len; i++) {
 101         unsigned char cur = line.ptr[i];
 102 
 103         if (cur == '\x1b') {
 104             esc = true;
 105             continue;
 106         }
 107 
 108         if (esc && (cur == '[' || cur == ']')) {
 109             return i - 1;
 110         }
 111 
 112         esc = false;
 113     }
 114 
 115     return -1;
 116 }
 117 
 118 // find_alpha tries to find the position of the first letter in a string
 119 static inline int64_t find_alpha(span line) {
 120     for (size_t i = 0; i < line.len; i++) {
 121         if (isalpha(line.ptr[i])) {
 122             return i;
 123         }
 124     }
 125     return -1;
 126 }
 127 
 128 // find_byte tries to find the first position of the value given in a string
 129 static inline int64_t find_byte(span line, unsigned char what) {
 130     for (size_t i = 0; i < line.len; i++) {
 131         if (line.ptr[i] == what) {
 132             return i;
 133         }
 134     }
 135     return -1;
 136 }
 137 
 138 // find_osc_end tries to find the position where an OSC sequence ends
 139 int64_t find_osc_end(span line) {
 140     unsigned char prev = 0;
 141 
 142     for (size_t i = 0; i < line.len; i++) {
 143         const unsigned char cur = line.ptr[i];
 144         if (prev == '\x1b' && cur == '\\') {
 145             return i;
 146         }
 147         prev = cur;
 148     }
 149 
 150     return -1;
 151 }
 152 
 153 // destyle_line renders the line given, omitting ANSI-styles
 154 void destyle_line(FILE* w, span line) {
 155     while (line.len > 0) {
 156         int64_t j = find_esc_pair(line, 0);
 157         if (j < 0) {
 158             fwrite(line.ptr, 1, line.len, w);
 159             return;
 160         }
 161 
 162         fwrite(line.ptr, 1, j, w);
 163         advance(&line, j);
 164 
 165         if (line.ptr[1] == '[') {
 166             j = find_alpha(line);
 167             if (j < 0) {
 168                 return;
 169             }
 170             advance(&line, j + 1);
 171             continue;
 172         }
 173 
 174         j = find_osc_end(line);
 175         if (j < 0) {
 176             return;
 177         }
 178         advance(&line, j + 1);
 179     }
 180 }
 181 
 182 bool starts_with_bom(span s) {
 183     const unsigned char* p = s.ptr;
 184     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 185 }
 186 
 187 // handle_lines loops over input lines, restyling all digit-runs as more
 188 // readable `nice numbers`, fulfilling the app's purpose
 189 void handle_lines(FILE* w, slice* line, FILE* src, bool live_lines) {
 190     span trimmed;
 191 
 192     for (size_t i = 0; !feof(w); i++) {
 193         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 194         if (line->ptr == NULL) {
 195             fprintf(stderr, "\n");
 196             fprintf(stderr, ERROR_LINE("out of memory"));
 197             exit(BAD_ALLOC);
 198         }
 199 
 200         if (len < 0) {
 201             break;
 202         }
 203 
 204         trimmed.ptr = line->ptr;
 205         trimmed.len = len;
 206 
 207         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 208         if (i == 0 && starts_with_bom(trimmed)) {
 209             trimmed.ptr += 3;
 210             trimmed.len -= 3;
 211             len = trimmed.len;
 212         }
 213 
 214         const unsigned char* p = trimmed.ptr;
 215         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 216         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 217             trimmed.len -= 2;
 218         } else if (len >= 1 && p[len - 1] == '\n') {
 219             trimmed.len--;
 220         }
 221 
 222         destyle_line(w, trimmed);
 223         fputc('\n', w);
 224         if (live_lines) {
 225             fflush(w);
 226         }
 227     }
 228 }
 229 
 230 // handle_file handles data from the filename given; returns false only when
 231 // the file can't be opened
 232 bool handle_file(FILE* w, slice* line, const char* path, bool live_lines) {
 233     FILE* f = fopen(path, "rb");
 234     if (f == NULL) {
 235         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 236         return false;
 237     }
 238 
 239     handle_lines(w, line, f, live_lines);
 240     fclose(f);
 241     return true;
 242 }
 243 
 244 // run returns the number of errors
 245 int run(int argc, char** argv, FILE* w, bool live_lines) {
 246     size_t errors = 0;
 247 
 248     slice line;
 249     line.cap = 32 * 1024;
 250     line.ptr = malloc(line.cap);
 251 
 252     if (line.ptr == NULL) {
 253         fprintf(stderr, ERROR_LINE("out of memory"));
 254         exit(BAD_ALLOC);
 255     }
 256 
 257     for (size_t i = 1; i < (size_t)argc && !feof(w); i++) {
 258         if (strcmp(argv[i], "-") == 0) {
 259             // `-` means standard input
 260             handle_lines(w, &line, stdin, live_lines);
 261             continue;
 262         }
 263 
 264         if (!handle_file(w, &line, argv[i], live_lines)) {
 265             errors++;
 266         }
 267     }
 268 
 269     // use stdin when not given any filepaths
 270     if (argc < 2) {
 271         handle_lines(w, &line, stdin, live_lines);
 272     }
 273 
 274     if (!live_lines) {
 275         fflush(w);
 276     }
 277     free(line.ptr);
 278     return errors;
 279 }
 280 
 281 // is_help_option simplifies control-flow for func main
 282 bool is_help_option(const char* s) {
 283     return (s[0] == '-') && (
 284         strcmp(s, "-h") == 0 ||
 285         strcmp(s, "-help") == 0 ||
 286         strcmp(s, "--h") == 0 ||
 287         strcmp(s, "--help") == 0
 288     );
 289 }
 290 
 291 int main(int argc, char** argv) {
 292 #ifdef _WIN32
 293     setmode(fileno(stdin), O_BINARY);
 294     // ensure output lines end in LF instead of CRLF on windows
 295     setmode(fileno(stdout), O_BINARY);
 296     setmode(fileno(stderr), O_BINARY);
 297 #endif
 298 
 299     // handle any of the help options, if given
 300     if (argc > 1 && is_help_option(argv[1])) {
 301         printf("%s", info);
 302         return 0;
 303     }
 304 
 305     const bool live_lines = lseek(fileno(stdout), 0, SEEK_CUR) != 0;
 306     if (!live_lines) {
 307         setvbuf(stdout, NULL, _IOFBF, 0);
 308     }
 309     return run(argc, argv, stdout, live_lines) == 0 ? 0 : 1;
 310 }