File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <fcntl.h> 33 #include <stdbool.h> 34 #include <stddef.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #ifdef _WIN32 40 #include <windows.h> 41 #endif 42 43 const char* info = "" 44 "plain [options...] [filepaths...]\n" 45 "\n" 46 "\n" 47 "Ignore all ANSI codes, leaving just the plain-text.\n" 48 "\n" 49 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n" 50 "from the standard input.\n" 51 "\n" 52 "\n" 53 "Options, all of which can start with either 1 or 2 dashes:\n" 54 "\n" 55 "\n" 56 " -h show this help message\n" 57 " -help show this help message\n" 58 ""; 59 60 const char* no_line_memory_msg = "can't get enough memory to read lines"; 61 62 // slice is a growable region of bytes in memory 63 typedef struct slice { 64 // ptr is the starting place of the region 65 unsigned char* ptr; 66 67 // len is how many bytes are currently being used 68 size_t len; 69 70 // cap is how many bytes the memory region has available 71 size_t cap; 72 } slice; 73 74 // advance updates a slice so it starts after the number of bytes given 75 inline void advance(slice* src, size_t n) { 76 src->ptr += n; 77 src->len -= n; 78 } 79 80 // find_esc_pair tries to find the starting index of 2-byte substrings 81 // "\x1b[" or "\x1b]", whichever comes first, if at all 82 int64_t find_esc_pair(slice line, size_t start) { 83 bool esc = false; 84 85 for (size_t i = start; i < line.len; i++) { 86 unsigned char cur = line.ptr[i]; 87 88 if (cur == '\x1b') { 89 esc = true; 90 continue; 91 } 92 93 if (esc && (cur == '[' || cur == ']')) { 94 return i - 1; 95 } 96 97 esc = false; 98 } 99 100 return -1; 101 } 102 103 // find_alpha tries to find the position of the first letter in a string 104 int64_t find_alpha(slice line) { 105 for (size_t i = 0; i < line.len; i++) { 106 if (isalpha(line.ptr[i])) { 107 return i; 108 } 109 } 110 return -1; 111 } 112 113 // find_byte tries to find the first position of the value given in a string 114 int64_t find_byte(slice line, unsigned char what) { 115 for (size_t i = 0; i < line.len; i++) { 116 if (line.ptr[i] == what) { 117 return i; 118 } 119 } 120 return -1; 121 } 122 123 // find_osc_end tries to find the first position after the end of OSC bytes 124 int64_t find_osc_end(slice line) { 125 size_t prev = 0; 126 for (size_t i = 0; i < line.len; i++) { 127 if (line.ptr[i] == '\a') { 128 return i; 129 } 130 if (prev == '\x1b' && line.ptr[i] == '\\') { 131 return i; 132 } 133 prev = line.ptr[i]; 134 } 135 return -1; 136 } 137 138 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) { 139 fwrite(src, len, 1, w); 140 } 141 142 typedef struct skip_state { 143 bool skip_alpha; 144 bool skip_osc; 145 } skip_state; 146 147 // destyle_line renders the line given, omitting ANSI-styles 148 void destyle_line(FILE* w, slice line, skip_state* state) { 149 if (state->skip_alpha) { 150 int64_t j = find_alpha(line); 151 if (j < 0) { 152 return; 153 } 154 state->skip_alpha = false; 155 advance(&line, j + 1); 156 } 157 158 if (state->skip_osc) { 159 // int64_t j = find_byte(line, '\a'); 160 int64_t j = find_osc_end(line); 161 if (j < 0) { 162 return; 163 } 164 state->skip_osc = false; 165 advance(&line, j + 1); 166 } 167 168 while (line.len > 0) { 169 int64_t j = find_esc_pair(line, 0); 170 if (j < 0) { 171 write_bytes(w, line.ptr, line.len); 172 return; 173 } 174 175 write_bytes(w, line.ptr, j); 176 advance(&line, j); 177 178 switch (line.ptr[1]) { 179 case '[': 180 j = find_alpha(line); 181 if (j < 0) { 182 state->skip_alpha = true; 183 return; 184 } 185 advance(&line, j + 1); 186 continue; 187 188 case ']': 189 // j = find_byte(line, '\a'); 190 j = find_osc_end(line); 191 if (j < 0) { 192 state->skip_osc = true; 193 return; 194 } 195 advance(&line, j + 1); 196 continue; 197 } 198 } 199 } 200 201 bool starts_with_bom(slice s) { 202 const unsigned char* p = s.ptr; 203 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 204 } 205 206 // handle_lines loops over input lines, restyling all digit-runs as more 207 // readable `nice numbers`, fulfilling the app's purpose 208 bool handle_lines(FILE* w, slice* line, FILE* src) { 209 slice trimmed; 210 skip_state state; 211 212 trimmed.cap = 0; 213 state.skip_alpha = false; 214 state.skip_osc = false; 215 216 for (size_t i = 0; !feof(stdout); i++) { 217 int len = getline((char**)&line->ptr, &line->cap, src); 218 if (len < 0) { 219 break; 220 } 221 222 if (line->ptr == NULL) { 223 putc('\n', w); 224 fflush(w); 225 226 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg); 227 exit(1); 228 } 229 230 line->len = len; 231 trimmed.ptr = line->ptr; 232 trimmed.len = line->len; 233 234 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 235 if (i == 0 && starts_with_bom(trimmed)) { 236 trimmed.ptr += 3; 237 trimmed.len -= 3; 238 len = trimmed.len; 239 } 240 241 const unsigned char* p = trimmed.ptr; 242 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 243 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 244 trimmed.len -= 2; 245 } else if (len >= 1 && p[len - 1] == '\n') { 246 trimmed.len--; 247 } 248 249 destyle_line(w, trimmed, &state); 250 if (!state.skip_alpha && !state.skip_osc) { 251 putc('\n', w); 252 fflush(w); 253 } 254 } 255 256 if (state.skip_alpha || state.skip_osc) { 257 putc('\n', w); 258 } 259 fflush(w); 260 return true; 261 } 262 263 // handle_file handles data from the filename given; returns false only when 264 // the file can't be opened 265 bool handle_file(FILE* w, slice* line, char* path) { 266 FILE* f = fopen(path, "rb"); 267 if (f == NULL) { 268 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 269 return false; 270 } 271 272 const bool ok = handle_lines(w, line, f); 273 fclose(f); 274 return ok; 275 } 276 277 // run returns the number of errors 278 int run(int argc, char** argv, FILE* w) { 279 size_t errors = 0; 280 281 slice line; 282 line.len = 0; 283 line.cap = 32 * 1024; 284 line.ptr = malloc(line.cap); 285 286 if (line.ptr == NULL) { 287 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg); 288 return 1; 289 } 290 291 // use stdin when not given any filepaths 292 if (argc < 2) { 293 if (!handle_lines(w, &line, stdin)) { 294 errors++; 295 } 296 return errors; 297 } 298 299 for (size_t i = 1; i < (size_t)argc && !feof(w); i++) { 300 if (argv[i][0] == '-' && argv[i][1] == 0) { 301 // `-` means standard input 302 if (!handle_lines(w, &line, stdin)) { 303 errors++; 304 } 305 continue; 306 } 307 308 if (!handle_file(w, &line, argv[i])) { 309 errors++; 310 } 311 } 312 313 free(line.ptr); 314 return errors; 315 } 316 317 // is_help_option simplifies control-flow for func main 318 bool is_help_option(char* s) { 319 return (s[0] == '-') && ( 320 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 321 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 322 ); 323 } 324 325 int main(int argc, char** argv) { 326 #ifdef _WIN32 327 setmode(fileno(stdin), O_BINARY); 328 // ensure output lines end in LF instead of CRLF on windows 329 setmode(fileno(stdout), O_BINARY); 330 setmode(fileno(stderr), O_BINARY); 331 #endif 332 333 // handle any of the help options, if given 334 if (argc > 1 && is_help_option(argv[1])) { 335 puts(info); 336 return 0; 337 } 338 339 return run(argc, argv, stdout) == 0 ? 0 : 1; 340 }