File: plain.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./plain ./plain.c 29 */ 30 31 #include <ctype.h> 32 #include <stdbool.h> 33 #include <stddef.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 39 #ifdef _WIN32 40 #include <fcntl.h> 41 #include <windows.h> 42 #endif 43 44 #ifdef RED_ERRORS 45 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 46 #ifdef __APPLE__ 47 #define ERROR_STYLE "\x1b[31m" 48 #endif 49 #define RESET_STYLE "\x1b[0m" 50 #else 51 #define ERROR_STYLE 52 #define RESET_STYLE 53 #endif 54 55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 56 57 #define BAD_ALLOC 2 58 59 const char* info = "" 60 "plain [options...] [filepaths...]\n" 61 "\n" 62 "Ignore all ANSI codes, leaving just the plain-text. All input is assumed to\n" 63 "be UTF-8. When not given any filepaths, the standard input is used.\n" 64 "\n" 65 "Options, all of which can start with either 1 or 2 dashes:\n" 66 "\n" 67 " -h show this help message\n" 68 " -help show this help message\n" 69 ""; 70 71 // span is a region of bytes in memory 72 typedef struct span { 73 // ptr is the starting place of the region 74 unsigned char* ptr; 75 76 // len is how many bytes are in the region 77 size_t len; 78 } span; 79 80 // advance updates a span so it starts after the number of bytes given 81 static inline void advance(span* src, size_t n) { 82 src->ptr += n; 83 src->len -= n; 84 } 85 86 // slice is a growable region of bytes in memory 87 typedef struct slice { 88 // ptr is the starting place of the region 89 unsigned char* ptr; 90 91 // cap is how many bytes the memory region has available 92 size_t cap; 93 } slice; 94 95 // find_esc_pair tries to find the starting index of 2-byte substring "\x1b[" 96 static inline int64_t find_esc_pair(span line, size_t start) { 97 bool esc = false; 98 99 for (size_t i = start; i < line.len; i++) { 100 unsigned char cur = line.ptr[i]; 101 102 if (cur == '\x1b') { 103 esc = true; 104 continue; 105 } 106 107 if (esc && cur == '[') { 108 return i - 1; 109 } 110 111 esc = false; 112 } 113 114 return -1; 115 } 116 117 // find_alpha tries to find the position of the first letter in a string 118 static inline int64_t find_alpha(span line) { 119 for (size_t i = 0; i < line.len; i++) { 120 if (isalpha(line.ptr[i])) { 121 return i; 122 } 123 } 124 return -1; 125 } 126 127 // find_byte tries to find the first position of the value given in a string 128 static inline int64_t find_byte(span line, unsigned char what) { 129 for (size_t i = 0; i < line.len; i++) { 130 if (line.ptr[i] == what) { 131 return i; 132 } 133 } 134 return -1; 135 } 136 137 // find_osc_end tries to find the first position after the end of OSC bytes 138 int64_t find_osc_end(span line) { 139 size_t prev = 0; 140 for (size_t i = 0; i < line.len; i++) { 141 if (line.ptr[i] == '\a') { 142 return i; 143 } 144 if (prev == '\x1b' && line.ptr[i] == '\\') { 145 return i; 146 } 147 prev = line.ptr[i]; 148 } 149 return -1; 150 } 151 152 // destyle_line renders the line given, omitting ANSI-styles 153 void destyle_line(FILE* w, span line) { 154 while (line.len > 0) { 155 int64_t j = find_esc_pair(line, 0); 156 if (j < 0) { 157 fwrite(line.ptr, 1, line.len, w); 158 return; 159 } 160 161 fwrite(line.ptr, 1, j, w); 162 advance(&line, j); 163 164 j = find_alpha(line); 165 if (j < 0) { 166 fwrite(line.ptr, 1, line.len, w); 167 return; 168 } 169 advance(&line, j + 1); 170 } 171 } 172 173 bool starts_with_bom(span s) { 174 const unsigned char* p = s.ptr; 175 return s.len >= 3 && p[0] == 0xef && p[1] == 0xbb && p[2] == 0xbf; 176 } 177 178 // handle_lines loops over input lines, restyling all digit-runs as more 179 // readable `nice numbers`, fulfilling the app's purpose 180 void handle_lines(FILE* w, slice* line, FILE* src, bool live_lines) { 181 span trimmed; 182 183 for (size_t i = 0; !feof(w); i++) { 184 ssize_t len = getline((char**)&line->ptr, &line->cap, src); 185 if (line->ptr == NULL) { 186 fprintf(stderr, "\n"); 187 fprintf(stderr, ERROR_LINE("out of memory")); 188 exit(BAD_ALLOC); 189 } 190 191 if (len < 0) { 192 break; 193 } 194 195 trimmed.ptr = line->ptr; 196 trimmed.len = len; 197 198 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 199 if (i == 0 && starts_with_bom(trimmed)) { 200 trimmed.ptr += 3; 201 trimmed.len -= 3; 202 len = trimmed.len; 203 } 204 205 const unsigned char* p = trimmed.ptr; 206 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 207 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 208 trimmed.len -= 2; 209 } else if (len >= 1 && p[len - 1] == '\n') { 210 trimmed.len--; 211 } 212 213 destyle_line(w, trimmed); 214 fputc('\n', w); 215 if (live_lines) { 216 fflush(w); 217 } 218 } 219 } 220 221 // handle_file handles data from the filename given; returns false only when 222 // the file can't be opened 223 bool handle_file(FILE* w, slice* line, const char* path, bool live_lines) { 224 FILE* f = fopen(path, "rb"); 225 if (f == NULL) { 226 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 227 return false; 228 } 229 230 handle_lines(w, line, f, live_lines); 231 fclose(f); 232 return true; 233 } 234 235 // run returns the number of errors 236 int run(int argc, char** argv, FILE* w, bool live_lines) { 237 size_t errors = 0; 238 239 slice line; 240 line.cap = 32 * 1024; 241 line.ptr = malloc(line.cap); 242 243 if (line.ptr == NULL) { 244 fprintf(stderr, ERROR_LINE("out of memory")); 245 exit(BAD_ALLOC); 246 } 247 248 for (size_t i = 1; i < (size_t)argc && !feof(w); i++) { 249 if (argv[i][0] == '-' && argv[i][1] == 0) { 250 // `-` means standard input 251 handle_lines(w, &line, stdin, live_lines); 252 continue; 253 } 254 255 if (!handle_file(w, &line, argv[i], live_lines)) { 256 errors++; 257 } 258 } 259 260 // use stdin when not given any filepaths 261 if (argc < 2) { 262 handle_lines(w, &line, stdin, live_lines); 263 } 264 265 if (!live_lines) { 266 fflush(w); 267 } 268 free(line.ptr); 269 return errors; 270 } 271 272 // is_help_option simplifies control-flow for func main 273 bool is_help_option(const char* s) { 274 return (s[0] == '-') && ( 275 strcmp(s, "-h") == 0 || 276 strcmp(s, "-help") == 0 || 277 strcmp(s, "--h") == 0 || 278 strcmp(s, "--help") == 0 279 ); 280 } 281 282 int main(int argc, char** argv) { 283 #ifdef _WIN32 284 setmode(fileno(stdin), O_BINARY); 285 // ensure output lines end in LF instead of CRLF on windows 286 setmode(fileno(stdout), O_BINARY); 287 setmode(fileno(stderr), O_BINARY); 288 #endif 289 290 // handle any of the help options, if given 291 if (argc > 1 && is_help_option(argv[1])) { 292 printf("%s", info); 293 return 0; 294 } 295 296 const bool live_lines = lseek(fileno(stdout), 0, SEEK_CUR) != 0; 297 if (!live_lines) { 298 setvbuf(stdout, NULL, _IOFBF, 0); 299 } 300 return run(argc, argv, stdout, live_lines) == 0 ? 0 : 1; 301 }