File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <stdbool.h> 33 #include <stddef.h> 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <unistd.h> 39 40 #ifdef _WIN32 41 #include <fcntl.h> 42 #include <windows.h> 43 #endif 44 45 #ifdef RED_ERRORS 46 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 47 #ifdef __APPLE__ 48 #define ERROR_STYLE "\x1b[31m" 49 #endif 50 #define RESET_STYLE "\x1b[0m" 51 #else 52 #define ERROR_STYLE 53 #define RESET_STYLE 54 #endif 55 56 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 57 58 #define BAD_ALLOC 2 59 60 const char* info = "" 61 "plain [options...] [filepaths...]\n" 62 "\n" 63 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n" 64 "be UTF-8. When not given any filepaths, the standard input is used.\n" 65 "\n" 66 "Options, all of which can start with either 1 or 2 dashes:\n" 67 "\n" 68 " -h show this help message\n" 69 " -help show this help message\n" 70 ""; 71 72 // span is a region of bytes in memory 73 typedef struct span { 74 // ptr is the starting place of the region 75 unsigned char* ptr; 76 77 // len is how many bytes are in the region 78 size_t len; 79 } span; 80 81 // advance updates a span so it starts after the number of bytes given 82 static inline void advance(span* src, size_t n) { 83 src->ptr += n; 84 src->len -= n; 85 } 86 87 // slice is a growable region of bytes in memory 88 typedef struct slice { 89 // ptr is the starting place of the region 90 unsigned char* ptr; 91 92 // cap is how many bytes the memory region has available 93 size_t cap; 94 } slice; 95 96 // find_esc_pair tries to find the starting index of either 2-byte substrings 97 // "\x1b[" or "\x1b]" 98 static inline int64_t find_esc_pair(span line, size_t start) { 99 bool esc = false; 100 101 for (size_t i = start; i < line.len; i++) { 102 unsigned char cur = line.ptr[i]; 103 104 if (cur == '\x1b') { 105 esc = true; 106 continue; 107 } 108 109 if (esc && (cur == '[' || cur == ']')) { 110 return i - 1; 111 } 112 113 esc = false; 114 } 115 116 return -1; 117 } 118 119 // find_alpha tries to find the position of the first letter in a string 120 static inline int64_t find_alpha(span line) { 121 for (size_t i = 0; i < line.len; i++) { 122 if (isalpha(line.ptr[i])) { 123 return i; 124 } 125 } 126 return -1; 127 } 128 129 // find_byte tries to find the first position of the value given in a string 130 static inline int64_t find_byte(span line, unsigned char what) { 131 for (size_t i = 0; i < line.len; i++) { 132 if (line.ptr[i] == what) { 133 return i; 134 } 135 } 136 return -1; 137 } 138 139 // find_osc_end tries to find the position where an OSC sequence ends 140 int64_t find_osc_end(span line) { 141 unsigned char prev = 0; 142 143 for (size_t i = 0; i < line.len; i++) { 144 const unsigned char cur = line.ptr[i]; 145 if (prev == '\x1b' && cur == '\\') { 146 return i; 147 } 148 prev = cur; 149 } 150 151 return -1; 152 } 153 154 // destyle_line renders the line given, omitting ANSI-styles 155 void destyle_line(FILE* w, span line) { 156 while (line.len > 0) { 157 int64_t j = find_esc_pair(line, 0); 158 if (j < 0) { 159 fwrite(line.ptr, 1, line.len, w); 160 return; 161 } 162 163 fwrite(line.ptr, 1, j, w); 164 advance(&line, j); 165 166 if (line.ptr[1] == '[') { 167 j = find_alpha(line); 168 if (j < 0) { 169 return; 170 } 171 advance(&line, j + 1); 172 continue; 173 } 174 175 j = find_osc_end(line); 176 if (j < 0) { 177 return; 178 } 179 advance(&line, j + 1); 180 } 181 } 182 183 bool starts_with_bom(span s) { 184 const unsigned char* p = s.ptr; 185 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 186 } 187 188 // handle_lines loops over input lines, restyling all digit-runs as more 189 // readable `nice numbers`, fulfilling the app's purpose 190 void handle_lines(FILE* w, slice* line, FILE* src, bool live_lines) { 191 span trimmed; 192 193 for (size_t i = 0; !feof(w); i++) { 194 ssize_t len = getline((char**)&line->ptr, &line->cap, src); 195 if (line->ptr == NULL) { 196 fprintf(stderr, "\n"); 197 fprintf(stderr, ERROR_LINE("out of memory")); 198 exit(BAD_ALLOC); 199 } 200 201 if (len < 0) { 202 break; 203 } 204 205 trimmed.ptr = line->ptr; 206 trimmed.len = len; 207 208 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 209 if (i == 0 && starts_with_bom(trimmed)) { 210 trimmed.ptr += 3; 211 trimmed.len -= 3; 212 len = trimmed.len; 213 } 214 215 const unsigned char* p = trimmed.ptr; 216 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 217 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 218 trimmed.len -= 2; 219 } else if (len >= 1 && p[len - 1] == '\n') { 220 trimmed.len--; 221 } 222 223 destyle_line(w, trimmed); 224 fputc('\n', w); 225 } 226 227 if (!live_lines) { 228 fflush(w); 229 } 230 } 231 232 // handle_file handles data from the filename given; returns false only when 233 // the file can't be opened 234 bool handle_file(FILE* w, slice* line, const char* path, bool live_lines) { 235 FILE* f = fopen(path, "rb"); 236 if (f == NULL) { 237 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 238 return false; 239 } 240 241 handle_lines(w, line, f, live_lines); 242 fclose(f); 243 return true; 244 } 245 246 // run returns the number of errors 247 int run(char** args, size_t nargs, FILE* w, bool live_lines) { 248 size_t dashes = 0; 249 for (int i = 0; i < nargs; i++) { 250 if (strcmp(args[i], "-") == 0) { 251 dashes++; 252 } 253 } 254 255 if (dashes > 1) { 256 const char* m = "can't use the standard input (dash) more than once"; 257 fprintf(stderr, ERROR_LINE("%s"), m); 258 return 1; 259 } 260 261 size_t errors = 0; 262 263 slice line; 264 line.cap = 32 * 1024; 265 line.ptr = malloc(line.cap); 266 267 if (line.ptr == NULL) { 268 fprintf(stderr, ERROR_LINE("out of memory")); 269 exit(BAD_ALLOC); 270 } 271 272 for (size_t i = 0; i < nargs && !feof(w); i++) { 273 if (strcmp(args[i], "-") == 0) { 274 // `-` means standard input 275 handle_lines(w, &line, stdin, live_lines); 276 continue; 277 } 278 279 if (!handle_file(w, &line, args[i], live_lines)) { 280 errors++; 281 } 282 } 283 284 // use stdin when not given any filepaths 285 if (nargs == 0) { 286 handle_lines(w, &line, stdin, live_lines); 287 } 288 289 if (!live_lines) { 290 fflush(w); 291 } 292 free(line.ptr); 293 return errors; 294 } 295 296 int main(int argc, char** argv) { 297 #ifdef _WIN32 298 setmode(fileno(stdin), O_BINARY); 299 // ensure output lines end in LF instead of CRLF on windows 300 setmode(fileno(stdout), O_BINARY); 301 setmode(fileno(stderr), O_BINARY); 302 #endif 303 304 if (argc > 1) { 305 if ( 306 strcmp(argv[1], "-h") == 0 || 307 strcmp(argv[1], "-help") == 0 || 308 strcmp(argv[1], "--h") == 0 || 309 strcmp(argv[1], "--help") == 0 310 ) { 311 fprintf(stdout, "%s", info); 312 return 0; 313 } 314 } 315 316 size_t nargs = argc - 1; 317 char** args = argv + 1; 318 bool buffered = false; 319 320 if (nargs > 0) { 321 if ( 322 strcmp(args[0], "-b") == 0 || 323 strcmp(args[0], "--b") == 0 || 324 strcmp(args[0], "-buffered") == 0 || 325 strcmp(args[0], "--buffered") == 0 326 ) { 327 buffered = true; 328 nargs--; 329 args++; 330 } 331 } 332 333 if (nargs > 0 && strcmp(args[0], "--") == 0) { 334 nargs--; 335 args++; 336 } 337 338 const int fd = fileno(stdout); 339 const bool live_lines = !buffered && lseek(fd, 0, SEEK_CUR) != 0; 340 if (live_lines) { 341 setvbuf(stdout, NULL, _IOLBF, 0); 342 } else { 343 setvbuf(stdout, NULL, _IOFBF, 0); 344 } 345 return run(args, nargs, stdout, live_lines) == 0 ? 0 : 1; 346 }