File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <stdbool.h> 33 #include <stddef.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 39 #ifdef _WIN32 40 #include <fcntl.h> 41 #include <windows.h> 42 #endif 43 44 #ifdef RED_ERRORS 45 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 46 #ifdef __APPLE__ 47 #define ERROR_STYLE "\x1b[31m" 48 #endif 49 #define RESET_STYLE "\x1b[0m" 50 #else 51 #define ERROR_STYLE 52 #define RESET_STYLE 53 #endif 54 55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 56 57 #define BAD_ALLOC 2 58 59 const char* info = "" 60 "plain [options...] [filepaths...]\n" 61 "\n" 62 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n" 63 "be UTF-8. When not given any filepaths, the standard input is used.\n" 64 "\n" 65 "Options, all of which can start with either 1 or 2 dashes:\n" 66 "\n" 67 " -h show this help message\n" 68 " -help show this help message\n" 69 ""; 70 71 // span is a region of bytes in memory 72 typedef struct span { 73 // ptr is the starting place of the region 74 unsigned char* ptr; 75 76 // len is how many bytes are in the region 77 size_t len; 78 } span; 79 80 // advance updates a span so it starts after the number of bytes given 81 static inline void advance(span* src, size_t n) { 82 src->ptr += n; 83 src->len -= n; 84 } 85 86 // slice is a growable region of bytes in memory 87 typedef struct slice { 88 // ptr is the starting place of the region 89 unsigned char* ptr; 90 91 // cap is how many bytes the memory region has available 92 size_t cap; 93 } slice; 94 95 // find_esc_pair tries to find the starting index of either 2-byte substrings 96 // "\x1b[" or "\x1b]" 97 static inline int64_t find_esc_pair(span line, size_t start) { 98 bool esc = false; 99 100 for (size_t i = start; i < line.len; i++) { 101 unsigned char cur = line.ptr[i]; 102 103 if (cur == '\x1b') { 104 esc = true; 105 continue; 106 } 107 108 if (esc && (cur == '[' || cur == ']')) { 109 return i - 1; 110 } 111 112 esc = false; 113 } 114 115 return -1; 116 } 117 118 // find_alpha tries to find the position of the first letter in a string 119 static inline int64_t find_alpha(span line) { 120 for (size_t i = 0; i < line.len; i++) { 121 if (isalpha(line.ptr[i])) { 122 return i; 123 } 124 } 125 return -1; 126 } 127 128 // find_byte tries to find the first position of the value given in a string 129 static inline int64_t find_byte(span line, unsigned char what) { 130 for (size_t i = 0; i < line.len; i++) { 131 if (line.ptr[i] == what) { 132 return i; 133 } 134 } 135 return -1; 136 } 137 138 // find_osc_end tries to find the position where an OSC sequence ends 139 int64_t find_osc_end(span line) { 140 unsigned char prev = 0; 141 142 for (size_t i = 0; i < line.len; i++) { 143 const unsigned char cur = line.ptr[i]; 144 if (prev == '\x1b' && cur == '\\') { 145 return i; 146 } 147 prev = cur; 148 } 149 150 return -1; 151 } 152 153 // destyle_line renders the line given, omitting ANSI-styles 154 void destyle_line(FILE* w, span line) { 155 while (line.len > 0) { 156 int64_t j = find_esc_pair(line, 0); 157 if (j < 0) { 158 fwrite(line.ptr, 1, line.len, w); 159 return; 160 } 161 162 fwrite(line.ptr, 1, j, w); 163 advance(&line, j); 164 165 if (line.ptr[1] == '[') { 166 j = find_alpha(line); 167 if (j < 0) { 168 return; 169 } 170 advance(&line, j + 1); 171 continue; 172 } 173 174 j = find_osc_end(line); 175 if (j < 0) { 176 return; 177 } 178 advance(&line, j + 1); 179 } 180 } 181 182 bool starts_with_bom(span s) { 183 const unsigned char* p = s.ptr; 184 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 185 } 186 187 // handle_lines loops over input lines, restyling all digit-runs as more 188 // readable `nice numbers`, fulfilling the app's purpose 189 void handle_lines(FILE* w, slice* line, FILE* src, bool live_lines) { 190 span trimmed; 191 192 for (size_t i = 0; !feof(w); i++) { 193 ssize_t len = getline((char**)&line->ptr, &line->cap, src); 194 if (line->ptr == NULL) { 195 fprintf(stderr, "\n"); 196 fprintf(stderr, ERROR_LINE("out of memory")); 197 exit(BAD_ALLOC); 198 } 199 200 if (len < 0) { 201 break; 202 } 203 204 trimmed.ptr = line->ptr; 205 trimmed.len = len; 206 207 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 208 if (i == 0 && starts_with_bom(trimmed)) { 209 trimmed.ptr += 3; 210 trimmed.len -= 3; 211 len = trimmed.len; 212 } 213 214 const unsigned char* p = trimmed.ptr; 215 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 216 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 217 trimmed.len -= 2; 218 } else if (len >= 1 && p[len - 1] == '\n') { 219 trimmed.len--; 220 } 221 222 destyle_line(w, trimmed); 223 fputc('\n', w); 224 } 225 226 if (!live_lines) { 227 fflush(w); 228 } 229 } 230 231 // handle_file handles data from the filename given; returns false only when 232 // the file can't be opened 233 bool handle_file(FILE* w, slice* line, const char* path, bool live_lines) { 234 FILE* f = fopen(path, "rb"); 235 if (f == NULL) { 236 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 237 return false; 238 } 239 240 handle_lines(w, line, f, live_lines); 241 fclose(f); 242 return true; 243 } 244 245 // run returns the number of errors 246 int run(char** args, size_t nargs, FILE* w, bool live_lines) { 247 size_t dashes = 0; 248 for (int i = 0; i < nargs; i++) { 249 if (strcmp(args[i], "-") == 0) { 250 dashes++; 251 } 252 } 253 254 if (dashes > 1) { 255 const char* m = "can't use the standard input (dash) more than once"; 256 fprintf(stderr, ERROR_LINE("%s"), m); 257 return 1; 258 } 259 260 size_t errors = 0; 261 262 slice line; 263 line.cap = 32 * 1024; 264 line.ptr = malloc(line.cap); 265 266 if (line.ptr == NULL) { 267 fprintf(stderr, ERROR_LINE("out of memory")); 268 exit(BAD_ALLOC); 269 } 270 271 for (size_t i = 0; i < nargs && !feof(w); i++) { 272 if (strcmp(args[i], "-") == 0) { 273 // `-` means standard input 274 handle_lines(w, &line, stdin, live_lines); 275 continue; 276 } 277 278 if (!handle_file(w, &line, args[i], live_lines)) { 279 errors++; 280 } 281 } 282 283 // use stdin when not given any filepaths 284 if (nargs == 0) { 285 handle_lines(w, &line, stdin, live_lines); 286 } 287 288 if (!live_lines) { 289 fflush(w); 290 } 291 free(line.ptr); 292 return errors; 293 } 294 295 int main(int argc, char** argv) { 296 #ifdef _WIN32 297 setmode(fileno(stdin), O_BINARY); 298 // ensure output lines end in LF instead of CRLF on windows 299 setmode(fileno(stdout), O_BINARY); 300 setmode(fileno(stderr), O_BINARY); 301 #endif 302 303 if (argc > 1) { 304 if ( 305 strcmp(argv[1], "-h") == 0 || 306 strcmp(argv[1], "-help") == 0 || 307 strcmp(argv[1], "--h") == 0 || 308 strcmp(argv[1], "--help") == 0 309 ) { 310 fprintf(stdout, "%s", info); 311 return 0; 312 } 313 } 314 315 size_t nargs = argc - 1; 316 char** args = argv + 1; 317 bool buffered = false; 318 319 if (nargs > 0) { 320 if ( 321 strcmp(args[0], "-buffered") == 0 || 322 strcmp(args[0], "--buffered") == 0 323 ) { 324 buffered = true; 325 nargs--; 326 args++; 327 } 328 } 329 330 if (nargs > 0 && strcmp(args[0], "--") == 0) { 331 nargs--; 332 args++; 333 } 334 335 const int fd = fileno(stdout); 336 const bool live_lines = !buffered && lseek(fd, 0, SEEK_CUR) != 0; 337 if (live_lines) { 338 setvbuf(stdout, NULL, _IOLBF, 0); 339 } else { 340 setvbuf(stdout, NULL, _IOFBF, 0); 341 } 342 return run(args, nargs, stdout, live_lines) == 0 ? 0 : 1; 343 }