File: fixlines.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./fixlines ./fixlines.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <fcntl.h>
  38 #include <windows.h>
  39 #endif
  40 
  41 const char* info = ""
  42 "fixlines [options...] [filepaths...]\n"
  43 "\n"
  44 "This tool fixes lines in UTF-8 text, ignoring leading UTF-8 BOMs, trailing\n"
  45 "carriage-returns on all lines, and ensures no lines across inputs are\n"
  46 "accidentally joined, since all lines it outputs end with line-feeds,\n"
  47 "even when the original files don't. Trailing spaces are also ignored.\n"
  48 "\n"
  49 "The only option available is to show this help message, using any of\n"
  50 "`-h`, `--h`, `-help`, or `--help`, without the quotes.\n"
  51 "";
  52 
  53 const char* no_line_memory_msg = "can't get enough memory to read lines";
  54 
  55 // slice is a growable region of bytes in memory
  56 typedef struct slice {
  57     // ptr is the starting place of the region
  58     unsigned char* ptr;
  59 
  60     // len is how many bytes are currently being used
  61     size_t len;
  62 
  63     // cap is how many bytes the memory region has available
  64     size_t cap;
  65 } slice;
  66 
  67 bool starts_with_bom(const unsigned char* b, const size_t n) {
  68     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
  69 }
  70 
  71 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all
  72 // CR-LF pairs into single LF bytes
  73 bool handle_reader(FILE* w, FILE* r, slice* line) {
  74     slice trimmed;
  75 
  76     for (size_t i = 0; !feof(w); i++) {
  77         ssize_t len = getline((char**)&line->ptr, &line->cap, r);
  78         if (len < 0) {
  79             break;
  80         }
  81 
  82         if (line->ptr == NULL) {
  83             putc('\n', w);
  84             fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
  85             return false;
  86         }
  87 
  88         line->len = len;
  89         trimmed.ptr = line->ptr;
  90         trimmed.len = line->len;
  91 
  92         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
  93         if (i == 0 && starts_with_bom(trimmed.ptr, trimmed.len)) {
  94             trimmed.ptr += 3;
  95             trimmed.len -= 3;
  96             len = trimmed.len;
  97         }
  98 
  99         const unsigned char* p = trimmed.ptr;
 100         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 101         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 102             trimmed.len -= 2;
 103         } else if (len >= 1 && p[len - 1] == '\n') {
 104             trimmed.len--;
 105         }
 106 
 107         fwrite(trimmed.ptr, trimmed.len, 1, w);
 108         putc('\n', w);
 109         fflush(w);
 110     }
 111 
 112     return true;
 113 }
 114 
 115 // handle_file handles data from the filename given; returns false only when
 116 // the file can't be opened
 117 bool handle_file(FILE* w, const char* fname, slice* line) {
 118     FILE* f = fopen(fname, "rb");
 119     if (f == NULL) {
 120         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname);
 121         return false;
 122     }
 123 
 124     const bool ok = handle_reader(w, f, line);
 125     fclose(f);
 126     return ok;
 127 }
 128 
 129 // run returns the number of errors
 130 int run(int argc, char** argv, FILE* w) {
 131     size_t dashes = 0;
 132     for (int i = 1; i < argc; i++) {
 133         if (argv[i][0] == '-' && argv[i][1] == 0) {
 134             dashes++;
 135         }
 136     }
 137 
 138     if (dashes > 1) {
 139         const char* msg = "can't use the standard input (dash) more than once";
 140         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg);
 141         return 1;
 142     }
 143 
 144     slice line;
 145     line.len = 0;
 146     line.cap = 32 * 1024;
 147     line.ptr = malloc(line.cap);
 148 
 149     if (line.ptr == NULL) {
 150         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
 151         return 1;
 152     }
 153 
 154     size_t errors = 0;
 155     for (int i = 1; i < argc && !feof(stdout) && line.ptr != NULL; i++) {
 156         if (argv[i][0] == '-' && argv[i][1] == 0) {
 157             if (!handle_reader(w, stdin, &line)) {
 158                 errors++;
 159             }
 160             continue;
 161         }
 162 
 163         if (!handle_file(w, argv[i], &line)) {
 164             errors++;
 165         }
 166     }
 167 
 168     // use stdin when not given any filepaths
 169     if (argc <= 1) {
 170         if (!handle_reader(w, stdin, &line)) {
 171             errors++;
 172         }
 173     }
 174 
 175     free(line.ptr);
 176     return errors;
 177 }
 178 
 179 int main(int argc, char** argv) {
 180 #ifdef _WIN32
 181     setmode(fileno(stdin), O_BINARY);
 182     // ensure output lines end in LF instead of CRLF on windows
 183     setmode(fileno(stdout), O_BINARY);
 184     setmode(fileno(stderr), O_BINARY);
 185 #endif
 186 
 187     if (argc > 1) {
 188         if (
 189             strcmp(argv[1], "-h") == 0 ||
 190             strcmp(argv[1], "-help") == 0 ||
 191             strcmp(argv[1], "--h") == 0 ||
 192             strcmp(argv[1], "--help") == 0
 193         ) {
 194             fprintf(stdout, "%s", info);
 195             return 0;
 196         }
 197     }
 198 
 199     return run(argc, argv, stdout) == 0 ? 0 : 1;
 200 }