File: plain.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./plain ./plain.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <fcntl.h>
  33 #include <stdbool.h>
  34 #include <stddef.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 
  39 #ifdef _WIN32
  40 #include <windows.h>
  41 #endif
  42 
  43 // info is the message shown when this app is given any of its help options
  44 const char* info = ""
  45 "plain [options...] [filepaths...]\n"
  46 "\n"
  47 "\n"
  48 "Ignore all ANSI codes, leaving just the plain-text.\n"
  49 "\n"
  50 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  51 "from the standard input.\n"
  52 "\n"
  53 "\n"
  54 "Options, all of which can start with either 1 or 2 dashes:\n"
  55 "\n"
  56 "\n"
  57 "  -h          show this help message\n"
  58 "  -help       show this help message\n"
  59 "";
  60 
  61 const char* line_memory_error_msg =
  62     ""
  63     "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n";
  64 
  65 // slice is a growable region of bytes in memory
  66 typedef struct slice {
  67     // ptr is the starting place of the region
  68     unsigned char* ptr;
  69 
  70     // len is how many bytes are currently being used
  71     size_t len;
  72 
  73     // cap is how many bytes the memory region has available
  74     size_t cap;
  75 } slice;
  76 
  77 // init_slice is the constructor for type slice
  78 void init_slice(slice* s, size_t cap) {
  79     s->ptr = malloc(cap);
  80     s->len = 0;
  81     s->cap = cap;
  82 }
  83 
  84 // advance updates a slice so it starts after the number of bytes given
  85 inline void advance(slice* src, size_t n) {
  86     src->ptr += n;
  87     src->len -= n;
  88 }
  89 
  90 // find_esc_pair tries to find the starting index of 2-byte substrings
  91 // "\x1b[" or "\x1b]", whichever comes first, if at all
  92 int64_t find_esc_pair(slice line, size_t start) {
  93     bool esc = false;
  94 
  95     for (size_t i = start; i < line.len; i++) {
  96         unsigned char cur = line.ptr[i];
  97 
  98         if (cur == '\x1b') {
  99             esc = true;
 100             continue;
 101         }
 102 
 103         if (esc && (cur == '[' || cur == ']')) {
 104             return i - 1;
 105         }
 106 
 107         esc = false;
 108     }
 109 
 110     return -1;
 111 }
 112 
 113 // find_alpha tries to find the position of the first letter in a string
 114 int64_t find_alpha(slice line) {
 115     for (size_t i = 0; i < line.len; i++) {
 116         if (isalpha(line.ptr[i])) {
 117             return i;
 118         }
 119     }
 120     return -1;
 121 }
 122 
 123 // find_byte tries to find the first position of the value given in a string
 124 int64_t find_byte(slice line, unsigned char what) {
 125     for (size_t i = 0; i < line.len; i++) {
 126         if (line.ptr[i] == what) {
 127             return i;
 128         }
 129     }
 130     return -1;
 131 }
 132 
 133 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) {
 134     fwrite(src, len, 1, w);
 135 }
 136 
 137 typedef struct skip_state {
 138     bool skip_alpha;
 139     bool skip_bell;
 140 } skip_state;
 141 
 142 // destyle_line renders the line given, omitting ANSI-styles
 143 void destyle_line(FILE* w, slice line, skip_state* state) {
 144     if (state->skip_alpha) {
 145         int64_t j = find_alpha(line);
 146         if (j < 0) {
 147             return;
 148         }
 149         state->skip_alpha = false;
 150         advance(&line, j + 1);
 151     }
 152 
 153     if (state->skip_bell) {
 154         int64_t j = find_byte(line, '\a');
 155         if (j < 0) {
 156             return;
 157         }
 158         state->skip_bell = false;
 159         advance(&line, j + 1);
 160     }
 161 
 162     while (line.len > 0) {
 163         int64_t j = find_esc_pair(line, 0);
 164         if (j < 0) {
 165             write_bytes(w, line.ptr, line.len);
 166             return;
 167         }
 168 
 169         write_bytes(w, line.ptr, j);
 170         advance(&line, j);
 171 
 172         switch (line.ptr[1]) {
 173             case '[':
 174                 j = find_alpha(line);
 175                 if (j < 0) {
 176                     state->skip_alpha = true;
 177                     return;
 178                 }
 179                 advance(&line, j + 1);
 180                 continue;
 181 
 182             case ']':
 183                 j = find_byte(line, '\a');
 184                 if (j < 0) {
 185                     state->skip_bell = true;
 186                     return;
 187                 }
 188                 advance(&line, j + 1);
 189                 continue;
 190         }
 191     }
 192 }
 193 
 194 bool starts_with_bom(slice s) {
 195     const unsigned char* p = s.ptr;
 196     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 197 }
 198 
 199 // handle_lines loops over input lines, restyling all digit-runs as more
 200 // readable `nice numbers`, fulfilling the app's purpose
 201 bool handle_lines(FILE* w, slice* line, FILE* src) {
 202     slice trimmed;
 203     skip_state state;
 204 
 205     trimmed.cap = 0;
 206     state.skip_alpha = false;
 207     state.skip_bell = false;
 208 
 209     for (size_t i = 0; !feof(stdout); i++) {
 210         int len = getline((char**)&line->ptr, &line->cap, src);
 211         if (len < 0) {
 212             break;
 213         }
 214 
 215         if (line->ptr == NULL) {
 216             putc('\n', w);
 217             fprintf(stderr, line_memory_error_msg);
 218             exit(1);
 219         }
 220 
 221         line->len = len;
 222         trimmed.ptr = line->ptr;
 223         trimmed.len = line->len;
 224 
 225         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 226         if (i == 0 && starts_with_bom(trimmed)) {
 227             trimmed.ptr += 3;
 228             trimmed.len -= 3;
 229             len = trimmed.len;
 230         }
 231 
 232         const unsigned char* p = trimmed.ptr;
 233         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 234         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 235             trimmed.len -= 2;
 236         } else if (len >= 1 && p[len - 1] == '\n') {
 237             trimmed.len--;
 238         }
 239 
 240         destyle_line(w, trimmed, &state);
 241         if (!state.skip_alpha && !state.skip_bell) {
 242             putc('\n', w);
 243         }
 244     }
 245 
 246     if (state.skip_alpha || state.skip_bell) {
 247         putc('\n', w);
 248     }
 249     fflush(w);
 250     return true;
 251 }
 252 
 253 // handle_file handles data from the filename given; returns false only when
 254 // the file can't be opened
 255 bool handle_file(FILE* w, slice* line, char* path) {
 256     FILE* f = fopen(path, "rb");
 257     if (f == NULL) {
 258         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 259         return false;
 260     }
 261 
 262     const bool ok = handle_lines(w, line, f);
 263     fclose(f);
 264     return ok;
 265 }
 266 
 267 // run returns the number of errors
 268 int run(int argc, char** argv, FILE* w, slice* line) {
 269     size_t errors = 0;
 270 
 271     // use stdin when not given any filepaths
 272     if (argc < 2) {
 273         if (!handle_lines(w, line, stdin)) {
 274             errors++;
 275         }
 276         return errors;
 277     }
 278 
 279     for (size_t i = 1; i < (size_t)argc && !feof(w); i++) {
 280         if (argv[i][0] == '-' && argv[i][1] == 0) {
 281             // `-` means standard input
 282             if (!handle_lines(w, line, stdin)) {
 283                 errors++;
 284             }
 285             continue;
 286         }
 287 
 288         if (!handle_file(w, line, argv[i])) {
 289             errors++;
 290         }
 291     }
 292 
 293     return errors;
 294 }
 295 
 296 // is_help_option simplifies control-flow for func main
 297 bool is_help_option(char* s) {
 298     return (s[0] == '-') && (
 299         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 300         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 301     );
 302 }
 303 
 304 int main(int argc, char** argv) {
 305 #ifdef _WIN32
 306     setmode(fileno(stdin), O_BINARY);
 307     // ensure output lines end in LF instead of CRLF on windows
 308     setmode(fileno(stdout), O_BINARY);
 309     setmode(fileno(stderr), O_BINARY);
 310 #endif
 311 
 312     // handle any of the help options, if given
 313     if (argc > 1 && is_help_option(argv[1])) {
 314         puts(info);
 315         return 0;
 316     }
 317 
 318     slice line;
 319     init_slice(&line, 32 * 1024);
 320     if (line.ptr == NULL) {
 321         fprintf(stderr, line_memory_error_msg);
 322         return 1;
 323     }
 324 
 325     const int res = run(argc, argv, stdout, &line) == 0 ? 0 : 1;
 326     free(line.ptr);
 327     return res;
 328 }