File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <stdbool.h> 33 #include <stddef.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 38 #ifdef _WIN32 39 #include <fcntl.h> 40 #include <windows.h> 41 #endif 42 43 #ifdef RED_ERRORS 44 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 45 #ifdef __APPLE__ 46 #define ERROR_STYLE "\x1b[31m" 47 #endif 48 #define RESET_STYLE "\x1b[0m" 49 #else 50 #define ERROR_STYLE 51 #define RESET_STYLE 52 #endif 53 54 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 55 56 const char* info = "" 57 "plain [options...] [filepaths...]\n" 58 "\n" 59 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n" 60 "be UTF-8. When not given any filepaths, the standard input is used.\n" 61 "\n" 62 "Options, all of which can start with either 1 or 2 dashes:\n" 63 "\n" 64 " -h show this help message\n" 65 " -help show this help message\n" 66 ""; 67 68 // span is a region of bytes in memory 69 typedef struct span { 70 // ptr is the starting place of the region 71 unsigned char* ptr; 72 73 // len is how many bytes are in the region 74 size_t len; 75 } span; 76 77 // advance updates a span so it starts after the number of bytes given 78 static inline void advance(span* src, size_t n) { 79 src->ptr += n; 80 src->len -= n; 81 } 82 83 // slice is a growable region of bytes in memory 84 typedef struct slice { 85 // ptr is the starting place of the region 86 unsigned char* ptr; 87 88 // len is how many bytes are currently being used 89 size_t len; 90 91 // cap is how many bytes the memory region has available 92 size_t cap; 93 } slice; 94 95 // find_esc_pair tries to find the starting index of 2-byte substring "\x1b[" 96 static inline int64_t find_esc_pair(span line, size_t start) { 97 bool esc = false; 98 99 for (size_t i = start; i < line.len; i++) { 100 unsigned char cur = line.ptr[i]; 101 102 if (cur == '\x1b') { 103 esc = true; 104 continue; 105 } 106 107 if (esc && cur == '[') { 108 return i - 1; 109 } 110 111 esc = false; 112 } 113 114 return -1; 115 } 116 117 // find_alpha tries to find the position of the first letter in a string 118 static inline int64_t find_alpha(span line) { 119 for (size_t i = 0; i < line.len; i++) { 120 if (isalpha(line.ptr[i])) { 121 return i; 122 } 123 } 124 return -1; 125 } 126 127 // find_byte tries to find the first position of the value given in a string 128 static inline int64_t find_byte(span line, unsigned char what) { 129 for (size_t i = 0; i < line.len; i++) { 130 if (line.ptr[i] == what) { 131 return i; 132 } 133 } 134 return -1; 135 } 136 137 // find_osc_end tries to find the first position after the end of OSC bytes 138 int64_t find_osc_end(span line) { 139 size_t prev = 0; 140 for (size_t i = 0; i < line.len; i++) { 141 if (line.ptr[i] == '\a') { 142 return i; 143 } 144 if (prev == '\x1b' && line.ptr[i] == '\\') { 145 return i; 146 } 147 prev = line.ptr[i]; 148 } 149 return -1; 150 } 151 152 // destyle_line renders the line given, omitting ANSI-styles 153 void destyle_line(FILE* w, span line) { 154 while (line.len > 0) { 155 int64_t j = find_esc_pair(line, 0); 156 if (j < 0) { 157 fwrite(line.ptr, 1, line.len, w); 158 return; 159 } 160 161 fwrite(line.ptr, 1, j, w); 162 advance(&line, j); 163 164 j = find_alpha(line); 165 if (j < 0) { 166 fwrite(line.ptr, 1, line.len, w); 167 return; 168 } 169 advance(&line, j + 1); 170 } 171 } 172 173 bool starts_with_bom(span s) { 174 const unsigned char* p = s.ptr; 175 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 176 } 177 178 // handle_lines loops over input lines, restyling all digit-runs as more 179 // readable `nice numbers`, fulfilling the app's purpose 180 bool handle_lines(FILE* w, slice* line, FILE* src) { 181 span trimmed; 182 183 for (size_t i = 0; !feof(w); i++) { 184 ssize_t len = getline((char**)&line->ptr, &line->cap, src); 185 if (line->ptr == NULL) { 186 fprintf(stderr, ERROR_LINE("out of memory")); 187 return false; 188 } 189 190 if (len < 0) { 191 break; 192 } 193 194 line->len = len; 195 trimmed.ptr = line->ptr; 196 trimmed.len = line->len; 197 198 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 199 if (i == 0 && starts_with_bom(trimmed)) { 200 trimmed.ptr += 3; 201 trimmed.len -= 3; 202 len = trimmed.len; 203 } 204 205 const unsigned char* p = trimmed.ptr; 206 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 207 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 208 trimmed.len -= 2; 209 } else if (len >= 1 && p[len - 1] == '\n') { 210 trimmed.len--; 211 } 212 213 destyle_line(w, trimmed); 214 fputc('\n', w); 215 fflush(w); 216 } 217 218 return true; 219 } 220 221 // handle_file handles data from the filename given; returns false only when 222 // the file can't be opened 223 bool handle_file(FILE* w, slice* line, const char* path) { 224 FILE* f = fopen(path, "rb"); 225 if (f == NULL) { 226 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 227 return false; 228 } 229 230 const bool ok = handle_lines(w, line, f); 231 fclose(f); 232 return ok; 233 } 234 235 // run returns the number of errors 236 int run(int argc, char** argv, FILE* w) { 237 size_t errors = 0; 238 239 slice line; 240 line.len = 0; 241 line.cap = 32 * 1024; 242 line.ptr = malloc(line.cap); 243 244 if (line.ptr == NULL) { 245 fprintf(stderr, ERROR_LINE("out of memory")); 246 return 1; 247 } 248 249 for (size_t i = 1; i < (size_t)argc && !feof(w) && line.ptr != NULL; i++) { 250 if (argv[i][0] == '-' && argv[i][1] == 0) { 251 // `-` means standard input 252 if (!handle_lines(w, &line, stdin)) { 253 errors++; 254 } 255 continue; 256 } 257 258 if (!handle_file(w, &line, argv[i])) { 259 errors++; 260 } 261 } 262 263 // use stdin when not given any filepaths 264 if (argc < 2) { 265 if (!handle_lines(w, &line, stdin)) { 266 errors++; 267 } 268 } 269 270 free(line.ptr); 271 return errors; 272 } 273 274 // is_help_option simplifies control-flow for func main 275 bool is_help_option(const char* s) { 276 return (s[0] == '-') && ( 277 strcmp(s, "-h") == 0 || 278 strcmp(s, "-help") == 0 || 279 strcmp(s, "--h") == 0 || 280 strcmp(s, "--help") == 0 281 ); 282 } 283 284 int main(int argc, char** argv) { 285 #ifdef _WIN32 286 setmode(fileno(stdin), O_BINARY); 287 // ensure output lines end in LF instead of CRLF on windows 288 setmode(fileno(stdout), O_BINARY); 289 setmode(fileno(stderr), O_BINARY); 290 #endif 291 292 // handle any of the help options, if given 293 if (argc > 1 && is_help_option(argv[1])) { 294 printf("%s", info); 295 return 0; 296 } 297 298 return run(argc, argv, stdout) == 0 ? 0 : 1; 299 }