File: plain.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./plain ./plain.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <fcntl.h>
  33 #include <stdbool.h>
  34 #include <stddef.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 
  39 #ifdef _WIN32
  40 #include <windows.h>
  41 #endif
  42 
  43 // info is the message shown when this app is given any of its help options
  44 const char* info = ""
  45 "plain [options...] [filepaths...]\n"
  46 "\n"
  47 "\n"
  48 "Ignore all ANSI codes, leaving just the plain-text.\n"
  49 "\n"
  50 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
  51 "from the standard input.\n"
  52 "\n"
  53 "\n"
  54 "Options, all of which can start with either 1 or 2 dashes:\n"
  55 "\n"
  56 "\n"
  57 "  -h          show this help message\n"
  58 "  -help       show this help message\n"
  59 "";
  60 
  61 const char* line_memory_error_msg =
  62     ""
  63     "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n";
  64 
  65 // slice is a growable region of bytes in memory
  66 typedef struct slice {
  67     // ptr is the starting place of the region
  68     unsigned char* ptr;
  69 
  70     // len is how many bytes are currently being used
  71     size_t len;
  72 
  73     // cap is how many bytes the memory region has available
  74     size_t cap;
  75 } slice;
  76 
  77 // init_slice is the constructor for type slice
  78 void init_slice(slice* s, size_t cap) {
  79     s->ptr = malloc(cap);
  80     s->len = 0;
  81     s->cap = cap;
  82 }
  83 
  84 // advance updates a slice so it starts after the number of bytes given
  85 inline void advance(slice* src, size_t n) {
  86     src->ptr += n;
  87     src->len -= n;
  88 }
  89 
  90 // find_esc_pair tries to find the starting index of 2-byte substrings
  91 // "\x1b[" or "\x1b]", whichever comes first, if at all
  92 int64_t find_esc_pair(slice line, size_t start) {
  93     bool esc = false;
  94 
  95     for (size_t i = start; i < line.len; i++) {
  96         unsigned char cur = line.ptr[i];
  97 
  98         if (cur == '\x1b') {
  99             esc = true;
 100             continue;
 101         }
 102 
 103         if (esc && (cur == '[' || cur == ']')) {
 104             return i - 1;
 105         }
 106 
 107         esc = false;
 108     }
 109 
 110     return -1;
 111 }
 112 
 113 // find_alpha tries to find the position of the first letter in a string
 114 int64_t find_alpha(slice line) {
 115     for (size_t i = 0; i < line.len; i++) {
 116         if (isalpha(line.ptr[i])) {
 117             return i;
 118         }
 119     }
 120     return -1;
 121 }
 122 
 123 // find_byte tries to find the first position of the value given in a string
 124 int64_t find_byte(slice line, unsigned char what) {
 125     for (size_t i = 0; i < line.len; i++) {
 126         if (line.ptr[i] == what) {
 127             return i;
 128         }
 129     }
 130     return -1;
 131 }
 132 
 133 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) {
 134     fwrite(src, len, 1, w);
 135 }
 136 
 137 typedef struct skip_state {
 138     bool skip_alpha;
 139     bool skip_bell;
 140 } skip_state;
 141 
 142 // destyle_line renders the line given, omitting ANSI-styles
 143 void destyle_line(FILE* w, slice line, skip_state* state) {
 144     if (state->skip_alpha) {
 145         int64_t j = find_alpha(line);
 146         if (j < 0) {
 147             return;
 148         }
 149         state->skip_alpha = false;
 150         advance(&line, j + 1);
 151     }
 152 
 153     if (state->skip_bell) {
 154         int64_t j = find_byte(line, '\a');
 155         if (j < 0) {
 156             return;
 157         }
 158         state->skip_bell = false;
 159         advance(&line, j + 1);
 160     }
 161 
 162     while (line.len > 0) {
 163         int64_t j = find_esc_pair(line, 0);
 164         if (j < 0) {
 165             write_bytes(w, line.ptr, line.len);
 166             return;
 167         }
 168 
 169         write_bytes(w, line.ptr, j);
 170         advance(&line, j);
 171 
 172         switch (line.ptr[1]) {
 173             case '[':
 174                 j = find_alpha(line);
 175                 if (j < 0) {
 176                     state->skip_alpha = true;
 177                     return;
 178                 }
 179                 advance(&line, j + 1);
 180                 continue;
 181 
 182             case ']':
 183                 j = find_byte(line, '\a');
 184                 if (j < 0) {
 185                     state->skip_bell = true;
 186                     return;
 187                 }
 188                 advance(&line, j + 1);
 189                 continue;
 190         }
 191     }
 192 }
 193 
 194 bool starts_with_bom(slice s) {
 195     const unsigned char* p = s.ptr;
 196     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 197 }
 198 
 199 // handle_lines loops over input lines, restyling all digit-runs as more
 200 // readable `nice numbers`, fulfilling the app's purpose
 201 bool handle_lines(FILE* w, slice* line, FILE* src) {
 202     slice trimmed;
 203     skip_state state;
 204 
 205     trimmed.cap = 0;
 206     state.skip_alpha = false;
 207     state.skip_bell = false;
 208 
 209     for (size_t i = 0; !feof(stdout); i++) {
 210         int len = getline((char**)&line->ptr, &line->cap, src);
 211         if (len < 0) {
 212             break;
 213         }
 214 
 215         if (line->ptr == NULL) {
 216             putc('\n', w);
 217             fflush(w);
 218 
 219             fprintf(stderr, line_memory_error_msg);
 220             exit(1);
 221         }
 222 
 223         line->len = len;
 224         trimmed.ptr = line->ptr;
 225         trimmed.len = line->len;
 226 
 227         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 228         if (i == 0 && starts_with_bom(trimmed)) {
 229             trimmed.ptr += 3;
 230             trimmed.len -= 3;
 231             len = trimmed.len;
 232         }
 233 
 234         const unsigned char* p = trimmed.ptr;
 235         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 236         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 237             trimmed.len -= 2;
 238         } else if (len >= 1 && p[len - 1] == '\n') {
 239             trimmed.len--;
 240         }
 241 
 242         destyle_line(w, trimmed, &state);
 243         if (!state.skip_alpha && !state.skip_bell) {
 244             putc('\n', w);
 245             fflush(w);
 246         }
 247     }
 248 
 249     if (state.skip_alpha || state.skip_bell) {
 250         putc('\n', w);
 251     }
 252     fflush(w);
 253     return true;
 254 }
 255 
 256 // handle_file handles data from the filename given; returns false only when
 257 // the file can't be opened
 258 bool handle_file(FILE* w, slice* line, char* path) {
 259     FILE* f = fopen(path, "rb");
 260     if (f == NULL) {
 261         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 262         return false;
 263     }
 264 
 265     const bool ok = handle_lines(w, line, f);
 266     fclose(f);
 267     return ok;
 268 }
 269 
 270 // run returns the number of errors
 271 int run(int argc, char** argv, FILE* w, slice* line) {
 272     size_t errors = 0;
 273 
 274     // use stdin when not given any filepaths
 275     if (argc < 2) {
 276         if (!handle_lines(w, line, stdin)) {
 277             errors++;
 278         }
 279         return errors;
 280     }
 281 
 282     for (size_t i = 1; i < (size_t)argc && !feof(w); i++) {
 283         if (argv[i][0] == '-' && argv[i][1] == 0) {
 284             // `-` means standard input
 285             if (!handle_lines(w, line, stdin)) {
 286                 errors++;
 287             }
 288             continue;
 289         }
 290 
 291         if (!handle_file(w, line, argv[i])) {
 292             errors++;
 293         }
 294     }
 295 
 296     return errors;
 297 }
 298 
 299 // is_help_option simplifies control-flow for func main
 300 bool is_help_option(char* s) {
 301     return (s[0] == '-') && (
 302         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 303         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 304     );
 305 }
 306 
 307 int main(int argc, char** argv) {
 308 #ifdef _WIN32
 309     setmode(fileno(stdin), O_BINARY);
 310     // ensure output lines end in LF instead of CRLF on windows
 311     setmode(fileno(stdout), O_BINARY);
 312     setmode(fileno(stderr), O_BINARY);
 313 #endif
 314 
 315     // handle any of the help options, if given
 316     if (argc > 1 && is_help_option(argv[1])) {
 317         puts(info);
 318         return 0;
 319     }
 320 
 321     slice line;
 322     init_slice(&line, 32 * 1024);
 323     if (line.ptr == NULL) {
 324         fprintf(stderr, line_memory_error_msg);
 325         return 1;
 326     }
 327 
 328     const int res = run(argc, argv, stdout, &line) == 0 ? 0 : 1;
 329     free(line.ptr);
 330     return res;
 331 }