File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <stdbool.h> 33 #include <stddef.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 39 #ifdef _WIN32 40 #include <fcntl.h> 41 #include <windows.h> 42 #endif 43 44 #ifdef RED_ERRORS 45 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 46 #ifdef __APPLE__ 47 #define ERROR_STYLE "\x1b[31m" 48 #endif 49 #define RESET_STYLE "\x1b[0m" 50 #else 51 #define ERROR_STYLE 52 #define RESET_STYLE 53 #endif 54 55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 56 57 #define BAD_ALLOC 2 58 59 const char* info = "" 60 "plain [options...] [filepaths...]\n" 61 "\n" 62 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n" 63 "be UTF-8. When not given any filepaths, the standard input is used.\n" 64 "\n" 65 "Options, all of which can start with either 1 or 2 dashes:\n" 66 "\n" 67 " -h show this help message\n" 68 " -help show this help message\n" 69 ""; 70 71 // span is a region of bytes in memory 72 typedef struct span { 73 // ptr is the starting place of the region 74 unsigned char* ptr; 75 76 // len is how many bytes are in the region 77 size_t len; 78 } span; 79 80 // advance updates a span so it starts after the number of bytes given 81 static inline void advance(span* src, size_t n) { 82 src->ptr += n; 83 src->len -= n; 84 } 85 86 // slice is a growable region of bytes in memory 87 typedef struct slice { 88 // ptr is the starting place of the region 89 unsigned char* ptr; 90 91 // cap is how many bytes the memory region has available 92 size_t cap; 93 } slice; 94 95 // find_esc_pair tries to find the starting index of either 2-byte substrings 96 // "\x1b[" or "\x1b]" 97 static inline int64_t find_esc_pair(span line, size_t start) { 98 bool esc = false; 99 100 for (size_t i = start; i < line.len; i++) { 101 unsigned char cur = line.ptr[i]; 102 103 if (cur == '\x1b') { 104 esc = true; 105 continue; 106 } 107 108 if (esc && (cur == '[' || cur == ']')) { 109 return i - 1; 110 } 111 112 esc = false; 113 } 114 115 return -1; 116 } 117 118 // find_alpha tries to find the position of the first letter in a string 119 static inline int64_t find_alpha(span line) { 120 for (size_t i = 0; i < line.len; i++) { 121 if (isalpha(line.ptr[i])) { 122 return i; 123 } 124 } 125 return -1; 126 } 127 128 // find_byte tries to find the first position of the value given in a string 129 static inline int64_t find_byte(span line, unsigned char what) { 130 for (size_t i = 0; i < line.len; i++) { 131 if (line.ptr[i] == what) { 132 return i; 133 } 134 } 135 return -1; 136 } 137 138 // find_osc_end tries to find the position where an OSC sequence ends 139 int64_t find_osc_end(span line) { 140 unsigned char prev = 0; 141 142 for (size_t i = 0; i < line.len; i++) { 143 const unsigned char cur = line.ptr[i]; 144 if (prev == '\x1b' && cur == '\\') { 145 return i; 146 } 147 prev = cur; 148 } 149 150 return -1; 151 } 152 153 // destyle_line renders the line given, omitting ANSI-styles 154 void destyle_line(FILE* w, span line) { 155 while (line.len > 0) { 156 int64_t j = find_esc_pair(line, 0); 157 if (j < 0) { 158 fwrite(line.ptr, 1, line.len, w); 159 return; 160 } 161 162 fwrite(line.ptr, 1, j, w); 163 advance(&line, j); 164 165 if (line.ptr[1] == '[') { 166 j = find_alpha(line); 167 if (j < 0) { 168 return; 169 } 170 advance(&line, j + 1); 171 continue; 172 } 173 174 j = find_osc_end(line); 175 if (j < 0) { 176 return; 177 } 178 advance(&line, j + 1); 179 } 180 } 181 182 bool starts_with_bom(span s) { 183 const unsigned char* p = s.ptr; 184 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 185 } 186 187 // handle_lines loops over input lines, restyling all digit-runs as more 188 // readable `nice numbers`, fulfilling the app's purpose 189 void handle_lines(FILE* w, slice* line, FILE* src, bool live_lines) { 190 span trimmed; 191 192 for (size_t i = 0; !feof(w); i++) { 193 ssize_t len = getline((char**)&line->ptr, &line->cap, src); 194 if (line->ptr == NULL) { 195 fprintf(stderr, "\n"); 196 fprintf(stderr, ERROR_LINE("out of memory")); 197 exit(BAD_ALLOC); 198 } 199 200 if (len < 0) { 201 break; 202 } 203 204 trimmed.ptr = line->ptr; 205 trimmed.len = len; 206 207 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 208 if (i == 0 && starts_with_bom(trimmed)) { 209 trimmed.ptr += 3; 210 trimmed.len -= 3; 211 len = trimmed.len; 212 } 213 214 const unsigned char* p = trimmed.ptr; 215 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 216 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 217 trimmed.len -= 2; 218 } else if (len >= 1 && p[len - 1] == '\n') { 219 trimmed.len--; 220 } 221 222 destyle_line(w, trimmed); 223 fputc('\n', w); 224 if (live_lines) { 225 fflush(w); 226 } 227 } 228 } 229 230 // handle_file handles data from the filename given; returns false only when 231 // the file can't be opened 232 bool handle_file(FILE* w, slice* line, const char* path, bool live_lines) { 233 FILE* f = fopen(path, "rb"); 234 if (f == NULL) { 235 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 236 return false; 237 } 238 239 handle_lines(w, line, f, live_lines); 240 fclose(f); 241 return true; 242 } 243 244 // run returns the number of errors 245 int run(int argc, char** argv, FILE* w, bool live_lines) { 246 size_t errors = 0; 247 248 slice line; 249 line.cap = 32 * 1024; 250 line.ptr = malloc(line.cap); 251 252 if (line.ptr == NULL) { 253 fprintf(stderr, ERROR_LINE("out of memory")); 254 exit(BAD_ALLOC); 255 } 256 257 for (size_t i = 1; i < (size_t)argc && !feof(w); i++) { 258 if (strcmp(argv[i], "-") == 0) { 259 // `-` means standard input 260 handle_lines(w, &line, stdin, live_lines); 261 continue; 262 } 263 264 if (!handle_file(w, &line, argv[i], live_lines)) { 265 errors++; 266 } 267 } 268 269 // use stdin when not given any filepaths 270 if (argc < 2) { 271 handle_lines(w, &line, stdin, live_lines); 272 } 273 274 if (!live_lines) { 275 fflush(w); 276 } 277 free(line.ptr); 278 return errors; 279 } 280 281 // is_help_option simplifies control-flow for func main 282 bool is_help_option(const char* s) { 283 return (s[0] == '-') && ( 284 strcmp(s, "-h") == 0 || 285 strcmp(s, "-help") == 0 || 286 strcmp(s, "--h") == 0 || 287 strcmp(s, "--help") == 0 288 ); 289 } 290 291 int main(int argc, char** argv) { 292 #ifdef _WIN32 293 setmode(fileno(stdin), O_BINARY); 294 // ensure output lines end in LF instead of CRLF on windows 295 setmode(fileno(stdout), O_BINARY); 296 setmode(fileno(stderr), O_BINARY); 297 #endif 298 299 // handle any of the help options, if given 300 if (argc > 1 && is_help_option(argv[1])) { 301 printf("%s", info); 302 return 0; 303 } 304 305 const bool live_lines = lseek(fileno(stdout), 0, SEEK_CUR) != 0; 306 if (!live_lines) { 307 setvbuf(stdout, NULL, _IOFBF, 0); 308 } 309 return run(argc, argv, stdout, live_lines) == 0 ? 0 : 1; 310 }