File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <stdbool.h> 33 #include <stddef.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 38 #ifdef _WIN32 39 #include <fcntl.h> 40 #include <windows.h> 41 #endif 42 43 const char* info = "" 44 "plain [options...] [filepaths...]\n" 45 "\n" 46 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n" 47 "be UTF-8. When not given any filepaths, the standard input is used.\n" 48 "\n" 49 "Options, all of which can start with either 1 or 2 dashes:\n" 50 "\n" 51 " -h show this help message\n" 52 " -help show this help message\n" 53 ""; 54 55 const char* no_line_memory_msg = "can't get enough memory to read lines"; 56 57 // span is a region of bytes in memory 58 typedef struct span { 59 // ptr is the starting place of the region 60 unsigned char* ptr; 61 62 // len is how many bytes are in the region 63 size_t len; 64 } span; 65 66 // advance updates a span so it starts after the number of bytes given 67 void advance(span* src, size_t n) { 68 src->ptr += n; 69 src->len -= n; 70 } 71 72 // slice is a growable region of bytes in memory 73 typedef struct slice { 74 // ptr is the starting place of the region 75 unsigned char* ptr; 76 77 // len is how many bytes are currently being used 78 size_t len; 79 80 // cap is how many bytes the memory region has available 81 size_t cap; 82 } slice; 83 84 // find_esc_pair tries to find the starting index of 2-byte substring "\x1b[" 85 int64_t find_esc_pair(span line, size_t start) { 86 bool esc = false; 87 88 for (size_t i = start; i < line.len; i++) { 89 unsigned char cur = line.ptr[i]; 90 91 if (cur == '\x1b') { 92 esc = true; 93 continue; 94 } 95 96 if (esc && cur == '[') { 97 return i - 1; 98 } 99 100 esc = false; 101 } 102 103 return -1; 104 } 105 106 // find_alpha tries to find the position of the first letter in a string 107 int64_t find_alpha(span line) { 108 for (size_t i = 0; i < line.len; i++) { 109 if (isalpha(line.ptr[i])) { 110 return i; 111 } 112 } 113 return -1; 114 } 115 116 // find_byte tries to find the first position of the value given in a string 117 int64_t find_byte(span line, unsigned char what) { 118 for (size_t i = 0; i < line.len; i++) { 119 if (line.ptr[i] == what) { 120 return i; 121 } 122 } 123 return -1; 124 } 125 126 // find_osc_end tries to find the first position after the end of OSC bytes 127 int64_t find_osc_end(span line) { 128 size_t prev = 0; 129 for (size_t i = 0; i < line.len; i++) { 130 if (line.ptr[i] == '\a') { 131 return i; 132 } 133 if (prev == '\x1b' && line.ptr[i] == '\\') { 134 return i; 135 } 136 prev = line.ptr[i]; 137 } 138 return -1; 139 } 140 141 void write_bytes(FILE* w, const unsigned char* src, size_t len) { 142 fwrite(src, len, 1, w); 143 } 144 145 typedef struct skip_state { 146 bool skip_alpha; 147 } skip_state; 148 149 // destyle_line renders the line given, omitting ANSI-styles 150 void destyle_line(FILE* w, span line, skip_state* state) { 151 if (state->skip_alpha) { 152 int64_t j = find_alpha(line); 153 if (j < 0) { 154 return; 155 } 156 state->skip_alpha = false; 157 advance(&line, j + 1); 158 } 159 160 while (line.len > 0) { 161 int64_t j = find_esc_pair(line, 0); 162 if (j < 0) { 163 write_bytes(w, line.ptr, line.len); 164 return; 165 } 166 167 write_bytes(w, line.ptr, j); 168 advance(&line, j); 169 170 j = find_alpha(line); 171 if (j < 0) { 172 state->skip_alpha = true; 173 return; 174 } 175 advance(&line, j + 1); 176 } 177 } 178 179 bool starts_with_bom(span s) { 180 const unsigned char* p = s.ptr; 181 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 182 } 183 184 // handle_lines loops over input lines, restyling all digit-runs as more 185 // readable `nice numbers`, fulfilling the app's purpose 186 bool handle_lines(FILE* w, slice* line, FILE* src) { 187 span trimmed; 188 skip_state state; 189 state.skip_alpha = false; 190 191 for (size_t i = 0; !feof(stdout); i++) { 192 ssize_t len = getline((char**)&line->ptr, &line->cap, src); 193 if (len < 0) { 194 break; 195 } 196 197 if (line->ptr == NULL) { 198 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg); 199 return false; 200 } 201 202 line->len = len; 203 trimmed.ptr = line->ptr; 204 trimmed.len = line->len; 205 206 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 207 if (i == 0 && starts_with_bom(trimmed)) { 208 trimmed.ptr += 3; 209 trimmed.len -= 3; 210 len = trimmed.len; 211 } 212 213 const unsigned char* p = trimmed.ptr; 214 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 215 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 216 trimmed.len -= 2; 217 } else if (len >= 1 && p[len - 1] == '\n') { 218 trimmed.len--; 219 } 220 221 destyle_line(w, trimmed, &state); 222 if (!state.skip_alpha) { 223 putc('\n', w); 224 fflush(w); 225 } 226 } 227 228 if (state.skip_alpha) { 229 putc('\n', w); 230 } 231 fflush(w); 232 return true; 233 } 234 235 // handle_file handles data from the filename given; returns false only when 236 // the file can't be opened 237 bool handle_file(FILE* w, slice* line, const char* path) { 238 FILE* f = fopen(path, "rb"); 239 if (f == NULL) { 240 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 241 return false; 242 } 243 244 const bool ok = handle_lines(w, line, f); 245 fclose(f); 246 return ok; 247 } 248 249 // run returns the number of errors 250 int run(int argc, char** argv, FILE* w) { 251 size_t errors = 0; 252 253 slice line; 254 line.len = 0; 255 line.cap = 32 * 1024; 256 line.ptr = malloc(line.cap); 257 258 if (line.ptr == NULL) { 259 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg); 260 return 1; 261 } 262 263 for (size_t i = 1; i < (size_t)argc && !feof(w) && line.ptr != NULL; i++) { 264 if (argv[i][0] == '-' && argv[i][1] == 0) { 265 // `-` means standard input 266 if (!handle_lines(w, &line, stdin)) { 267 errors++; 268 } 269 continue; 270 } 271 272 if (!handle_file(w, &line, argv[i])) { 273 errors++; 274 } 275 } 276 277 // use stdin when not given any filepaths 278 if (argc < 2) { 279 if (!handle_lines(w, &line, stdin)) { 280 errors++; 281 } 282 } 283 284 free(line.ptr); 285 return errors; 286 } 287 288 // is_help_option simplifies control-flow for func main 289 bool is_help_option(const char* s) { 290 return (s[0] == '-') && ( 291 strcmp(s, "-h") == 0 || 292 strcmp(s, "-help") == 0 || 293 strcmp(s, "--h") == 0 || 294 strcmp(s, "--help") == 0 295 ); 296 } 297 298 int main(int argc, char** argv) { 299 #ifdef _WIN32 300 setmode(fileno(stdin), O_BINARY); 301 // ensure output lines end in LF instead of CRLF on windows 302 setmode(fileno(stdout), O_BINARY); 303 setmode(fileno(stderr), O_BINARY); 304 #endif 305 306 // handle any of the help options, if given 307 if (argc > 1 && is_help_option(argv[1])) { 308 puts(info); 309 return 0; 310 } 311 312 return run(argc, argv, stdout) == 0 ? 0 : 1; 313 }