File: fixlines.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./fixlines ./fixlines.c 29 */ 30 31 #include <stdbool.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <string.h> 35 36 #ifdef _WIN32 37 #include <fcntl.h> 38 #include <windows.h> 39 #endif 40 41 const char* info = "" 42 "fixlines [options...] [filepaths...]\n" 43 "\n" 44 "This tool fixes lines in UTF-8 text, ignoring leading UTF-8 BOMs, trailing\n" 45 "carriage-returns on all lines, and ensures no lines across inputs are\n" 46 "accidentally joined, since all lines it outputs end with line-feeds,\n" 47 "even when the original files don't. Trailing spaces are also ignored.\n" 48 "\n" 49 "The only option available is to show this help message, using any of\n" 50 "`-h`, `--h`, `-help`, or `--help`, without the quotes.\n" 51 ""; 52 53 const char* no_line_memory_msg = "can't get enough memory to read lines"; 54 55 // slice is a growable region of bytes in memory 56 typedef struct slice { 57 // ptr is the starting place of the region 58 unsigned char* ptr; 59 60 // len is how many bytes are currently being used 61 size_t len; 62 63 // cap is how many bytes the memory region has available 64 size_t cap; 65 } slice; 66 67 bool starts_with_bom(const unsigned char* b, const size_t n) { 68 return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); 69 } 70 71 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all 72 // CR-LF pairs into single LF bytes 73 bool handle_reader(FILE* w, FILE* r, slice* line) { 74 slice trimmed; 75 76 for (size_t i = 0; !feof(w); i++) { 77 ssize_t len = getline((char**)&line->ptr, &line->cap, r); 78 if (len < 0) { 79 break; 80 } 81 82 if (line->ptr == NULL) { 83 putc('\n', w); 84 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg); 85 return false; 86 } 87 88 line->len = len; 89 trimmed.ptr = line->ptr; 90 trimmed.len = line->len; 91 92 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 93 if (i == 0 && starts_with_bom(trimmed.ptr, trimmed.len)) { 94 trimmed.ptr += 3; 95 trimmed.len -= 3; 96 len = trimmed.len; 97 } 98 99 const unsigned char* p = trimmed.ptr; 100 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 101 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 102 trimmed.len -= 2; 103 } else if (len >= 1 && p[len - 1] == '\n') { 104 trimmed.len--; 105 } 106 107 fwrite(trimmed.ptr, trimmed.len, 1, w); 108 putc('\n', w); 109 fflush(w); 110 } 111 112 return true; 113 } 114 115 // handle_file handles data from the filename given; returns false only when 116 // the file can't be opened 117 bool handle_file(FILE* w, const char* fname, slice* line) { 118 FILE* f = fopen(fname, "rb"); 119 if (f == NULL) { 120 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname); 121 return false; 122 } 123 124 const bool ok = handle_reader(w, f, line); 125 fclose(f); 126 return ok; 127 } 128 129 // run returns the number of errors 130 int run(int argc, char** argv, FILE* w) { 131 size_t dashes = 0; 132 for (int i = 1; i < argc; i++) { 133 if (argv[i][0] == '-' && argv[i][1] == 0) { 134 dashes++; 135 } 136 } 137 138 if (dashes > 1) { 139 const char* msg = "can't use the standard input (dash) more than once"; 140 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg); 141 return 1; 142 } 143 144 slice line; 145 line.len = 0; 146 line.cap = 32 * 1024; 147 line.ptr = malloc(line.cap); 148 149 if (line.ptr == NULL) { 150 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg); 151 return 1; 152 } 153 154 size_t errors = 0; 155 for (int i = 1; i < argc && !feof(stdout) && line.ptr != NULL; i++) { 156 if (argv[i][0] == '-' && argv[i][1] == 0) { 157 if (!handle_reader(w, stdin, &line)) { 158 errors++; 159 } 160 continue; 161 } 162 163 if (!handle_file(w, argv[i], &line)) { 164 errors++; 165 } 166 } 167 168 // use stdin when not given any filepaths 169 if (argc <= 1) { 170 if (!handle_reader(w, stdin, &line)) { 171 errors++; 172 } 173 } 174 175 free(line.ptr); 176 return errors; 177 } 178 179 int main(int argc, char** argv) { 180 #ifdef _WIN32 181 setmode(fileno(stdin), O_BINARY); 182 // ensure output lines end in LF instead of CRLF on windows 183 setmode(fileno(stdout), O_BINARY); 184 setmode(fileno(stderr), O_BINARY); 185 #endif 186 187 if (argc > 1) { 188 if ( 189 strcmp(argv[1], "-h") == 0 || 190 strcmp(argv[1], "-help") == 0 || 191 strcmp(argv[1], "--h") == 0 || 192 strcmp(argv[1], "--help") == 0 193 ) { 194 fprintf(stdout, "%s", info); 195 return 0; 196 } 197 } 198 199 return run(argc, argv, stdout) == 0 ? 0 : 1; 200 }