File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <fcntl.h> 33 #include <stdbool.h> 34 #include <stddef.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #ifdef _WIN32 40 #include <windows.h> 41 #endif 42 43 // info is the message shown when this app is given any of its help options 44 const char* info = "" 45 "plain [options...] [filepaths...]\n" 46 "\n" 47 "\n" 48 "Ignore all ANSI codes, leaving just the plain-text.\n" 49 "\n" 50 "All input is assumed to be UTF-8. When not given any filepaths, input is read\n" 51 "from the standard input.\n" 52 "\n" 53 "\n" 54 "Options, all of which can start with either 1 or 2 dashes:\n" 55 "\n" 56 "\n" 57 " -h show this help message\n" 58 " -help show this help message\n" 59 ""; 60 61 const char* line_memory_error_msg = 62 "" 63 "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n"; 64 65 // slice is a growable region of bytes in memory 66 typedef struct slice { 67 // ptr is the starting place of the region 68 unsigned char* ptr; 69 70 // len is how many bytes are currently being used 71 size_t len; 72 73 // cap is how many bytes the memory region has available 74 size_t cap; 75 } slice; 76 77 // init_slice is the constructor for type slice 78 void init_slice(slice* s, size_t cap) { 79 s->ptr = malloc(cap); 80 s->len = 0; 81 s->cap = cap; 82 } 83 84 // advance updates a slice so it starts after the number of bytes given 85 inline void advance(slice* src, size_t n) { 86 src->ptr += n; 87 src->len -= n; 88 } 89 90 // find_esc_pair tries to find the starting index of 2-byte substrings 91 // "\x1b[" or "\x1b]", whichever comes first, if at all 92 int64_t find_esc_pair(slice line, size_t start) { 93 bool esc = false; 94 95 for (size_t i = start; i < line.len; i++) { 96 unsigned char cur = line.ptr[i]; 97 98 if (cur == '\x1b') { 99 esc = true; 100 continue; 101 } 102 103 if (esc && (cur == '[' || cur == ']')) { 104 return i - 1; 105 } 106 107 esc = false; 108 } 109 110 return -1; 111 } 112 113 // find_alpha tries to find the position of the first letter in a string 114 int64_t find_alpha(slice line) { 115 for (size_t i = 0; i < line.len; i++) { 116 if (isalpha(line.ptr[i])) { 117 return i; 118 } 119 } 120 return -1; 121 } 122 123 // find_byte tries to find the first position of the value given in a string 124 int64_t find_byte(slice line, unsigned char what) { 125 for (size_t i = 0; i < line.len; i++) { 126 if (line.ptr[i] == what) { 127 return i; 128 } 129 } 130 return -1; 131 } 132 133 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) { 134 fwrite(src, len, 1, w); 135 } 136 137 typedef struct skip_state { 138 bool skip_alpha; 139 bool skip_bell; 140 } skip_state; 141 142 // destyle_line renders the line given, omitting ANSI-styles 143 void destyle_line(FILE* w, slice line, skip_state* state) { 144 if (state->skip_alpha) { 145 int64_t j = find_alpha(line); 146 if (j < 0) { 147 return; 148 } 149 state->skip_alpha = false; 150 advance(&line, j + 1); 151 } 152 153 if (state->skip_bell) { 154 int64_t j = find_byte(line, '\a'); 155 if (j < 0) { 156 return; 157 } 158 state->skip_bell = false; 159 advance(&line, j + 1); 160 } 161 162 while (line.len > 0) { 163 int64_t j = find_esc_pair(line, 0); 164 if (j < 0) { 165 write_bytes(w, line.ptr, line.len); 166 return; 167 } 168 169 write_bytes(w, line.ptr, j); 170 advance(&line, j); 171 172 switch (line.ptr[1]) { 173 case '[': 174 j = find_alpha(line); 175 if (j < 0) { 176 state->skip_alpha = true; 177 return; 178 } 179 advance(&line, j + 1); 180 continue; 181 182 case ']': 183 j = find_byte(line, '\a'); 184 if (j < 0) { 185 state->skip_bell = true; 186 return; 187 } 188 advance(&line, j + 1); 189 continue; 190 } 191 } 192 } 193 194 bool starts_with_bom(slice s) { 195 const unsigned char* p = s.ptr; 196 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 197 } 198 199 // handle_lines loops over input lines, restyling all digit-runs as more 200 // readable `nice numbers`, fulfilling the app's purpose 201 bool handle_lines(FILE* w, slice* line, FILE* src) { 202 slice trimmed; 203 skip_state state; 204 205 trimmed.cap = 0; 206 state.skip_alpha = false; 207 state.skip_bell = false; 208 209 for (size_t i = 0; !feof(stdout); i++) { 210 int len = getline((char**)&line->ptr, &line->cap, src); 211 if (len < 0) { 212 break; 213 } 214 215 if (line->ptr == NULL) { 216 putc('\n', w); 217 fflush(w); 218 219 fprintf(stderr, line_memory_error_msg); 220 exit(1); 221 } 222 223 line->len = len; 224 trimmed.ptr = line->ptr; 225 trimmed.len = line->len; 226 227 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 228 if (i == 0 && starts_with_bom(trimmed)) { 229 trimmed.ptr += 3; 230 trimmed.len -= 3; 231 len = trimmed.len; 232 } 233 234 const unsigned char* p = trimmed.ptr; 235 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 236 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 237 trimmed.len -= 2; 238 } else if (len >= 1 && p[len - 1] == '\n') { 239 trimmed.len--; 240 } 241 242 destyle_line(w, trimmed, &state); 243 if (!state.skip_alpha && !state.skip_bell) { 244 putc('\n', w); 245 fflush(w); 246 } 247 } 248 249 if (state.skip_alpha || state.skip_bell) { 250 putc('\n', w); 251 } 252 fflush(w); 253 return true; 254 } 255 256 // handle_file handles data from the filename given; returns false only when 257 // the file can't be opened 258 bool handle_file(FILE* w, slice* line, char* path) { 259 FILE* f = fopen(path, "rb"); 260 if (f == NULL) { 261 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 262 return false; 263 } 264 265 const bool ok = handle_lines(w, line, f); 266 fclose(f); 267 return ok; 268 } 269 270 // run returns the number of errors 271 int run(int argc, char** argv, FILE* w, slice* line) { 272 size_t errors = 0; 273 274 // use stdin when not given any filepaths 275 if (argc < 2) { 276 if (!handle_lines(w, line, stdin)) { 277 errors++; 278 } 279 return errors; 280 } 281 282 for (size_t i = 1; i < (size_t)argc && !feof(w); i++) { 283 if (argv[i][0] == '-' && argv[i][1] == 0) { 284 // `-` means standard input 285 if (!handle_lines(w, line, stdin)) { 286 errors++; 287 } 288 continue; 289 } 290 291 if (!handle_file(w, line, argv[i])) { 292 errors++; 293 } 294 } 295 296 return errors; 297 } 298 299 // is_help_option simplifies control-flow for func main 300 bool is_help_option(char* s) { 301 return (s[0] == '-') && ( 302 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 303 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 304 ); 305 } 306 307 int main(int argc, char** argv) { 308 #ifdef _WIN32 309 setmode(fileno(stdin), O_BINARY); 310 // ensure output lines end in LF instead of CRLF on windows 311 setmode(fileno(stdout), O_BINARY); 312 setmode(fileno(stderr), O_BINARY); 313 #endif 314 315 // handle any of the help options, if given 316 if (argc > 1 && is_help_option(argv[1])) { 317 puts(info); 318 return 0; 319 } 320 321 slice line; 322 init_slice(&line, 32 * 1024); 323 if (line.ptr == NULL) { 324 fprintf(stderr, line_memory_error_msg); 325 return 1; 326 } 327 328 const int res = run(argc, argv, stdout, &line) == 0 ? 0 : 1; 329 free(line.ptr); 330 return res; 331 }