File: plain.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./plain ./plain.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdbool.h>
  33 #include <stddef.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 
  38 #ifdef _WIN32
  39 #include <fcntl.h>
  40 #include <windows.h>
  41 #endif
  42 
  43 #ifdef RED_ERRORS
  44 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  45 #ifdef __APPLE__
  46 #define ERROR_STYLE "\x1b[31m"
  47 #endif
  48 #define RESET_STYLE "\x1b[0m"
  49 #else
  50 #define ERROR_STYLE
  51 #define RESET_STYLE
  52 #endif
  53 
  54 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  55 
  56 const char* info = ""
  57 "plain [options...] [filepaths...]\n"
  58 "\n"
  59 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n"
  60 "be UTF-8. When not given any filepaths, the standard input is used.\n"
  61 "\n"
  62 "Options, all of which can start with either 1 or 2 dashes:\n"
  63 "\n"
  64 "  -h          show this help message\n"
  65 "  -help       show this help message\n"
  66 "";
  67 
  68 // span is a region of bytes in memory
  69 typedef struct span {
  70     // ptr is the starting place of the region
  71     unsigned char* ptr;
  72 
  73     // len is how many bytes are in the region
  74     size_t len;
  75 } span;
  76 
  77 // advance updates a span so it starts after the number of bytes given
  78 static inline void advance(span* src, size_t n) {
  79     src->ptr += n;
  80     src->len -= n;
  81 }
  82 
  83 // slice is a growable region of bytes in memory
  84 typedef struct slice {
  85     // ptr is the starting place of the region
  86     unsigned char* ptr;
  87 
  88     // len is how many bytes are currently being used
  89     size_t len;
  90 
  91     // cap is how many bytes the memory region has available
  92     size_t cap;
  93 } slice;
  94 
  95 // find_esc_pair tries to find the starting index of 2-byte substring "\x1b["
  96 static inline int64_t find_esc_pair(span line, size_t start) {
  97     bool esc = false;
  98 
  99     for (size_t i = start; i < line.len; i++) {
 100         unsigned char cur = line.ptr[i];
 101 
 102         if (cur == '\x1b') {
 103             esc = true;
 104             continue;
 105         }
 106 
 107         if (esc && cur == '[') {
 108             return i - 1;
 109         }
 110 
 111         esc = false;
 112     }
 113 
 114     return -1;
 115 }
 116 
 117 // find_alpha tries to find the position of the first letter in a string
 118 static inline int64_t find_alpha(span line) {
 119     for (size_t i = 0; i < line.len; i++) {
 120         if (isalpha(line.ptr[i])) {
 121             return i;
 122         }
 123     }
 124     return -1;
 125 }
 126 
 127 // find_byte tries to find the first position of the value given in a string
 128 static inline int64_t find_byte(span line, unsigned char what) {
 129     for (size_t i = 0; i < line.len; i++) {
 130         if (line.ptr[i] == what) {
 131             return i;
 132         }
 133     }
 134     return -1;
 135 }
 136 
 137 // find_osc_end tries to find the first position after the end of OSC bytes
 138 int64_t find_osc_end(span line) {
 139     size_t prev = 0;
 140     for (size_t i = 0; i < line.len; i++) {
 141         if (line.ptr[i] == '\a') {
 142             return i;
 143         }
 144         if (prev == '\x1b' && line.ptr[i] == '\\') {
 145             return i;
 146         }
 147         prev = line.ptr[i];
 148     }
 149     return -1;
 150 }
 151 
 152 // destyle_line renders the line given, omitting ANSI-styles
 153 void destyle_line(FILE* w, span line) {
 154     while (line.len > 0) {
 155         int64_t j = find_esc_pair(line, 0);
 156         if (j < 0) {
 157             fwrite(line.ptr, 1, line.len, w);
 158             return;
 159         }
 160 
 161         fwrite(line.ptr, 1, j, w);
 162         advance(&line, j);
 163 
 164         j = find_alpha(line);
 165         if (j < 0) {
 166             fwrite(line.ptr, 1, line.len, w);
 167             return;
 168         }
 169         advance(&line, j + 1);
 170     }
 171 }
 172 
 173 bool starts_with_bom(span s) {
 174     const unsigned char* p = s.ptr;
 175     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 176 }
 177 
 178 // handle_lines loops over input lines, restyling all digit-runs as more
 179 // readable `nice numbers`, fulfilling the app's purpose
 180 bool handle_lines(FILE* w, slice* line, FILE* src) {
 181     span trimmed;
 182 
 183     for (size_t i = 0; !feof(w); i++) {
 184         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 185         if (line->ptr == NULL) {
 186             fprintf(stderr, ERROR_LINE("out of memory"));
 187             return false;
 188         }
 189 
 190         if (len < 0) {
 191             break;
 192         }
 193 
 194         line->len = len;
 195         trimmed.ptr = line->ptr;
 196         trimmed.len = line->len;
 197 
 198         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 199         if (i == 0 && starts_with_bom(trimmed)) {
 200             trimmed.ptr += 3;
 201             trimmed.len -= 3;
 202             len = trimmed.len;
 203         }
 204 
 205         const unsigned char* p = trimmed.ptr;
 206         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 207         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 208             trimmed.len -= 2;
 209         } else if (len >= 1 && p[len - 1] == '\n') {
 210             trimmed.len--;
 211         }
 212 
 213         destyle_line(w, trimmed);
 214         fputc('\n', w);
 215         fflush(w);
 216     }
 217 
 218     return true;
 219 }
 220 
 221 // handle_file handles data from the filename given; returns false only when
 222 // the file can't be opened
 223 bool handle_file(FILE* w, slice* line, const char* path) {
 224     FILE* f = fopen(path, "rb");
 225     if (f == NULL) {
 226         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 227         return false;
 228     }
 229 
 230     const bool ok = handle_lines(w, line, f);
 231     fclose(f);
 232     return ok;
 233 }
 234 
 235 // run returns the number of errors
 236 int run(int argc, char** argv, FILE* w) {
 237     size_t errors = 0;
 238 
 239     slice line;
 240     line.len = 0;
 241     line.cap = 32 * 1024;
 242     line.ptr = malloc(line.cap);
 243 
 244     if (line.ptr == NULL) {
 245         fprintf(stderr, ERROR_LINE("out of memory"));
 246         return 1;
 247     }
 248 
 249     for (size_t i = 1; i < (size_t)argc && !feof(w) && line.ptr != NULL; i++) {
 250         if (argv[i][0] == '-' && argv[i][1] == 0) {
 251             // `-` means standard input
 252             if (!handle_lines(w, &line, stdin)) {
 253                 errors++;
 254             }
 255             continue;
 256         }
 257 
 258         if (!handle_file(w, &line, argv[i])) {
 259             errors++;
 260         }
 261     }
 262 
 263     // use stdin when not given any filepaths
 264     if (argc < 2) {
 265         if (!handle_lines(w, &line, stdin)) {
 266             errors++;
 267         }
 268     }
 269 
 270     free(line.ptr);
 271     return errors;
 272 }
 273 
 274 // is_help_option simplifies control-flow for func main
 275 bool is_help_option(const char* s) {
 276     return (s[0] == '-') && (
 277         strcmp(s, "-h") == 0 ||
 278         strcmp(s, "-help") == 0 ||
 279         strcmp(s, "--h") == 0 ||
 280         strcmp(s, "--help") == 0
 281     );
 282 }
 283 
 284 int main(int argc, char** argv) {
 285 #ifdef _WIN32
 286     setmode(fileno(stdin), O_BINARY);
 287     // ensure output lines end in LF instead of CRLF on windows
 288     setmode(fileno(stdout), O_BINARY);
 289     setmode(fileno(stderr), O_BINARY);
 290 #endif
 291 
 292     // handle any of the help options, if given
 293     if (argc > 1 && is_help_option(argv[1])) {
 294         printf("%s", info);
 295         return 0;
 296     }
 297 
 298     return run(argc, argv, stdout) == 0 ? 0 : 1;
 299 }