File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <fcntl.h> 33 #include <stdbool.h> 34 #include <stddef.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #ifdef _WIN32 40 #include <windows.h> 41 #endif 42 43 // info is the message shown when this app is given any of its help options 44 const char* info = "" 45 "plain [options...] [filepaths...]\n" 46 "\n" 47 "\n" 48 "Ignore all ANSI codes, leaving just the plain-text.\n" 49 "\n" 50 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n" 51 "from the standard input.\n" 52 "\n" 53 "\n" 54 "Options, all of which can start with either 1 or 2 dashes:\n" 55 "\n" 56 "\n" 57 " -h show this help message\n" 58 " -help show this help message\n" 59 ""; 60 61 const char* line_memory_error_msg = 62 "" 63 "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n"; 64 65 // slice is a growable region of bytes in memory 66 typedef struct slice { 67 // ptr is the starting place of the region 68 unsigned char* ptr; 69 70 // len is how many bytes are currently being used 71 size_t len; 72 73 // cap is how many bytes the memory region has available 74 size_t cap; 75 } slice; 76 77 // init_slice is the constructor for type slice 78 void init_slice(slice* s, size_t cap) { 79 s->ptr = malloc(cap); 80 s->len = 0; 81 s->cap = cap; 82 } 83 84 // advance updates a slice so it starts after the number of bytes given 85 inline void advance(slice* src, size_t n) { 86 src->ptr += n; 87 src->len -= n; 88 } 89 90 // find_esc_pair tries to find the starting index of 2-byte substrings 91 // "\x1b[" or "\x1b]", whichever comes first, if at all 92 int64_t find_esc_pair(slice line, size_t start) { 93 bool esc = false; 94 95 for (size_t i = start; i < line.len; i++) { 96 unsigned char cur = line.ptr[i]; 97 98 if (cur == '\x1b') { 99 esc = true; 100 continue; 101 } 102 103 if (esc && (cur == '[' || cur == ']')) { 104 return i - 1; 105 } 106 107 esc = false; 108 } 109 110 return -1; 111 } 112 113 // find_alpha tries to find the position of the first letter in a string 114 int64_t find_alpha(slice line) { 115 for (size_t i = 0; i < line.len; i++) { 116 if (isalpha(line.ptr[i])) { 117 return i; 118 } 119 } 120 return -1; 121 } 122 123 // find_byte tries to find the first position of the value given in a string 124 int64_t find_byte(slice line, unsigned char what) { 125 for (size_t i = 0; i < line.len; i++) { 126 if (line.ptr[i] == what) { 127 return i; 128 } 129 } 130 return -1; 131 } 132 133 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) { 134 fwrite(src, len, 1, w); 135 } 136 137 typedef struct skip_state { 138 bool skip_alpha; 139 bool skip_bell; 140 } skip_state; 141 142 // destyle_line renders the line given, omitting ANSI-styles 143 void destyle_line(FILE* w, slice line, skip_state* state) { 144 if (state->skip_alpha) { 145 int64_t j = find_alpha(line); 146 if (j < 0) { 147 return; 148 } 149 state->skip_alpha = false; 150 advance(&line, j + 1); 151 } 152 153 if (state->skip_bell) { 154 int64_t j = find_byte(line, '\a'); 155 if (j < 0) { 156 return; 157 } 158 state->skip_bell = false; 159 advance(&line, j + 1); 160 } 161 162 while (line.len > 0) { 163 int64_t j = find_esc_pair(line, 0); 164 if (j < 0) { 165 write_bytes(w, line.ptr, line.len); 166 return; 167 } 168 169 write_bytes(w, line.ptr, j); 170 advance(&line, j); 171 172 switch (line.ptr[1]) { 173 case '[': 174 j = find_alpha(line); 175 if (j < 0) { 176 state->skip_alpha = true; 177 return; 178 } 179 advance(&line, j + 1); 180 continue; 181 182 case ']': 183 j = find_byte(line, '\a'); 184 if (j < 0) { 185 state->skip_bell = true; 186 return; 187 } 188 advance(&line, j + 1); 189 continue; 190 } 191 } 192 } 193 194 bool starts_with_bom(slice s) { 195 const unsigned char* p = s.ptr; 196 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 197 } 198 199 // handle_lines loops over input lines, restyling all digit-runs as more 200 // readable `nice numbers`, fulfilling the app's purpose 201 bool handle_lines(FILE* w, slice* line, FILE* src) { 202 slice trimmed; 203 skip_state state; 204 205 trimmed.cap = 0; 206 state.skip_alpha = false; 207 state.skip_bell = false; 208 209 for (size_t i = 0; !feof(stdout); i++) { 210 int len = getline((char**)&line->ptr, &line->cap, src); 211 if (len < 0) { 212 break; 213 } 214 215 if (line->ptr == NULL) { 216 putc('\n', w); 217 fprintf(stderr, line_memory_error_msg); 218 exit(1); 219 } 220 221 line->len = len; 222 trimmed.ptr = line->ptr; 223 trimmed.len = line->len; 224 225 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 226 if (i == 0 && starts_with_bom(trimmed)) { 227 trimmed.ptr += 3; 228 trimmed.len -= 3; 229 len = trimmed.len; 230 } 231 232 const unsigned char* p = trimmed.ptr; 233 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 234 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 235 trimmed.len -= 2; 236 } else if (len >= 1 && p[len - 1] == '\n') { 237 trimmed.len--; 238 } 239 240 destyle_line(w, trimmed, &state); 241 if (!state.skip_alpha && !state.skip_bell) { 242 putc('\n', w); 243 } 244 } 245 246 if (state.skip_alpha || state.skip_bell) { 247 putc('\n', w); 248 } 249 fflush(w); 250 return true; 251 } 252 253 // handle_file handles data from the filename given; returns false only when 254 // the file can't be opened 255 bool handle_file(FILE* w, slice* line, char* path) { 256 FILE* f = fopen(path, "rb"); 257 if (f == NULL) { 258 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 259 return false; 260 } 261 262 const bool ok = handle_lines(w, line, f); 263 fclose(f); 264 return ok; 265 } 266 267 // run returns the number of errors 268 int run(int argc, char** argv, FILE* w, slice* line) { 269 size_t errors = 0; 270 271 // use stdin when not given any filepaths 272 if (argc < 2) { 273 if (!handle_lines(w, line, stdin)) { 274 errors++; 275 } 276 return errors; 277 } 278 279 for (size_t i = 1; i < (size_t)argc && !feof(w); i++) { 280 if (argv[i][0] == '-' && argv[i][1] == 0) { 281 // `-` means standard input 282 if (!handle_lines(w, line, stdin)) { 283 errors++; 284 } 285 continue; 286 } 287 288 if (!handle_file(w, line, argv[i])) { 289 errors++; 290 } 291 } 292 293 return errors; 294 } 295 296 // is_help_option simplifies control-flow for func main 297 bool is_help_option(char* s) { 298 return (s[0] == '-') && ( 299 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 300 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 301 ); 302 } 303 304 int main(int argc, char** argv) { 305 #ifdef _WIN32 306 setmode(fileno(stdin), O_BINARY); 307 // ensure output lines end in LF instead of CRLF on windows 308 setmode(fileno(stdout), O_BINARY); 309 setmode(fileno(stderr), O_BINARY); 310 #endif 311 312 // handle any of the help options, if given 313 if (argc > 1 && is_help_option(argv[1])) { 314 puts(info); 315 return 0; 316 } 317 318 slice line; 319 init_slice(&line, 32 * 1024); 320 if (line.ptr == NULL) { 321 fprintf(stderr, line_memory_error_msg); 322 return 1; 323 } 324 325 const int res = run(argc, argv, stdout, &line) == 0 ? 0 : 1; 326 free(line.ptr); 327 return res; 328 }