File: plain.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./plain ./plain.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdbool.h>
  33 #include <stddef.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 
  38 #ifdef _WIN32
  39 #include <fcntl.h>
  40 #include <windows.h>
  41 #endif
  42 
  43 const char* info = ""
  44 "plain [options...] [filepaths...]\n"
  45 "\n"
  46 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n"
  47 "be UTF-8. When not given any filepaths, the standard input is used.\n"
  48 "\n"
  49 "Options, all of which can start with either 1 or 2 dashes:\n"
  50 "\n"
  51 "  -h          show this help message\n"
  52 "  -help       show this help message\n"
  53 "";
  54 
  55 const char* no_line_memory_msg = "can't get enough memory to read lines";
  56 
  57 // span is a region of bytes in memory
  58 typedef struct span {
  59     // ptr is the starting place of the region
  60     unsigned char* ptr;
  61 
  62     // len is how many bytes are in the region
  63     size_t len;
  64 } span;
  65 
  66 // advance updates a span so it starts after the number of bytes given
  67 void advance(span* src, size_t n) {
  68     src->ptr += n;
  69     src->len -= n;
  70 }
  71 
  72 // slice is a growable region of bytes in memory
  73 typedef struct slice {
  74     // ptr is the starting place of the region
  75     unsigned char* ptr;
  76 
  77     // len is how many bytes are currently being used
  78     size_t len;
  79 
  80     // cap is how many bytes the memory region has available
  81     size_t cap;
  82 } slice;
  83 
  84 // find_esc_pair tries to find the starting index of 2-byte substring "\x1b["
  85 int64_t find_esc_pair(span line, size_t start) {
  86     bool esc = false;
  87 
  88     for (size_t i = start; i < line.len; i++) {
  89         unsigned char cur = line.ptr[i];
  90 
  91         if (cur == '\x1b') {
  92             esc = true;
  93             continue;
  94         }
  95 
  96         if (esc && cur == '[') {
  97             return i - 1;
  98         }
  99 
 100         esc = false;
 101     }
 102 
 103     return -1;
 104 }
 105 
 106 // find_alpha tries to find the position of the first letter in a string
 107 int64_t find_alpha(span line) {
 108     for (size_t i = 0; i < line.len; i++) {
 109         if (isalpha(line.ptr[i])) {
 110             return i;
 111         }
 112     }
 113     return -1;
 114 }
 115 
 116 // find_byte tries to find the first position of the value given in a string
 117 int64_t find_byte(span line, unsigned char what) {
 118     for (size_t i = 0; i < line.len; i++) {
 119         if (line.ptr[i] == what) {
 120             return i;
 121         }
 122     }
 123     return -1;
 124 }
 125 
 126 // find_osc_end tries to find the first position after the end of OSC bytes
 127 int64_t find_osc_end(span line) {
 128     size_t prev = 0;
 129     for (size_t i = 0; i < line.len; i++) {
 130         if (line.ptr[i] == '\a') {
 131             return i;
 132         }
 133         if (prev == '\x1b' && line.ptr[i] == '\\') {
 134             return i;
 135         }
 136         prev = line.ptr[i];
 137     }
 138     return -1;
 139 }
 140 
 141 void write_bytes(FILE* w, const unsigned char* src, size_t len) {
 142     fwrite(src, len, 1, w);
 143 }
 144 
 145 typedef struct skip_state {
 146     bool skip_alpha;
 147 } skip_state;
 148 
 149 // destyle_line renders the line given, omitting ANSI-styles
 150 void destyle_line(FILE* w, span line, skip_state* state) {
 151     if (state->skip_alpha) {
 152         int64_t j = find_alpha(line);
 153         if (j < 0) {
 154             return;
 155         }
 156         state->skip_alpha = false;
 157         advance(&line, j + 1);
 158     }
 159 
 160     while (line.len > 0) {
 161         int64_t j = find_esc_pair(line, 0);
 162         if (j < 0) {
 163             write_bytes(w, line.ptr, line.len);
 164             return;
 165         }
 166 
 167         write_bytes(w, line.ptr, j);
 168         advance(&line, j);
 169 
 170         j = find_alpha(line);
 171         if (j < 0) {
 172             state->skip_alpha = true;
 173             return;
 174         }
 175         advance(&line, j + 1);
 176     }
 177 }
 178 
 179 bool starts_with_bom(span s) {
 180     const unsigned char* p = s.ptr;
 181     return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf;
 182 }
 183 
 184 // handle_lines loops over input lines, restyling all digit-runs as more
 185 // readable `nice numbers`, fulfilling the app's purpose
 186 bool handle_lines(FILE* w, slice* line, FILE* src) {
 187     span trimmed;
 188     skip_state state;
 189     state.skip_alpha = false;
 190 
 191     for (size_t i = 0; !feof(stdout); i++) {
 192         ssize_t len = getline((char**)&line->ptr, &line->cap, src);
 193         if (len < 0) {
 194             break;
 195         }
 196 
 197         if (line->ptr == NULL) {
 198             fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
 199             return false;
 200         }
 201 
 202         line->len = len;
 203         trimmed.ptr = line->ptr;
 204         trimmed.len = line->len;
 205 
 206         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
 207         if (i == 0 && starts_with_bom(trimmed)) {
 208             trimmed.ptr += 3;
 209             trimmed.len -= 3;
 210             len = trimmed.len;
 211         }
 212 
 213         const unsigned char* p = trimmed.ptr;
 214         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 215         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 216             trimmed.len -= 2;
 217         } else if (len >= 1 && p[len - 1] == '\n') {
 218             trimmed.len--;
 219         }
 220 
 221         destyle_line(w, trimmed, &state);
 222         if (!state.skip_alpha) {
 223             putc('\n', w);
 224             fflush(w);
 225         }
 226     }
 227 
 228     if (state.skip_alpha) {
 229         putc('\n', w);
 230     }
 231     fflush(w);
 232     return true;
 233 }
 234 
 235 // handle_file handles data from the filename given; returns false only when
 236 // the file can't be opened
 237 bool handle_file(FILE* w, slice* line, const char* path) {
 238     FILE* f = fopen(path, "rb");
 239     if (f == NULL) {
 240         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 241         return false;
 242     }
 243 
 244     const bool ok = handle_lines(w, line, f);
 245     fclose(f);
 246     return ok;
 247 }
 248 
 249 // run returns the number of errors
 250 int run(int argc, char** argv, FILE* w) {
 251     size_t errors = 0;
 252 
 253     slice line;
 254     line.len = 0;
 255     line.cap = 32 * 1024;
 256     line.ptr = malloc(line.cap);
 257 
 258     if (line.ptr == NULL) {
 259         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
 260         return 1;
 261     }
 262 
 263     for (size_t i = 1; i < (size_t)argc && !feof(w) && line.ptr != NULL; i++) {
 264         if (argv[i][0] == '-' && argv[i][1] == 0) {
 265             // `-` means standard input
 266             if (!handle_lines(w, &line, stdin)) {
 267                 errors++;
 268             }
 269             continue;
 270         }
 271 
 272         if (!handle_file(w, &line, argv[i])) {
 273             errors++;
 274         }
 275     }
 276 
 277     // use stdin when not given any filepaths
 278     if (argc < 2) {
 279         if (!handle_lines(w, &line, stdin)) {
 280             errors++;
 281         }
 282     }
 283 
 284     free(line.ptr);
 285     return errors;
 286 }
 287 
 288 // is_help_option simplifies control-flow for func main
 289 bool is_help_option(const char* s) {
 290     return (s[0] == '-') && (
 291         strcmp(s, "-h") == 0 ||
 292         strcmp(s, "-help") == 0 ||
 293         strcmp(s, "--h") == 0 ||
 294         strcmp(s, "--help") == 0
 295     );
 296 }
 297 
 298 int main(int argc, char** argv) {
 299 #ifdef _WIN32
 300     setmode(fileno(stdin), O_BINARY);
 301     // ensure output lines end in LF instead of CRLF on windows
 302     setmode(fileno(stdout), O_BINARY);
 303     setmode(fileno(stderr), O_BINARY);
 304 #endif
 305 
 306     // handle any of the help options, if given
 307     if (argc > 1 && is_help_option(argv[1])) {
 308         puts(info);
 309         return 0;
 310     }
 311 
 312     return run(argc, argv, stdout) == 0 ? 0 : 1;
 313 }