File: detrail.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./detrail ./detrail.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <unistd.h>
  36 
  37 #ifdef _WIN32
  38 #include <fcntl.h>
  39 #include <windows.h>
  40 #endif
  41 
  42 #ifdef RED_ERRORS
  43 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  44 #ifdef __APPLE__
  45 #define ERROR_STYLE "\x1b[31m"
  46 #endif
  47 #define RESET_STYLE "\x1b[0m"
  48 #else
  49 #define ERROR_STYLE
  50 #define RESET_STYLE
  51 #endif
  52 
  53 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  54 
  55 #define BAD_ALLOC 2
  56 
  57 const char* info = ""
  58 "detrail [filenames...]\n"
  59 "\n"
  60 "Ignore trailing spaces and/or carriage-returns from text lines.\n"
  61 "";
  62 
  63 // slice is a growable region of bytes in memory
  64 typedef struct slice {
  65     // ptr is the starting place of the region
  66     unsigned char* ptr;
  67 
  68     // cap is how many bytes the memory region has available
  69     size_t cap;
  70 } slice;
  71 
  72 bool starts_with_bom(const unsigned char* b, const size_t n) {
  73     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
  74 }
  75 
  76 void handle_reader(FILE* w, FILE* r, slice* line, bool live_lines) {
  77     for (size_t i = 0; !feof(w); i++) {
  78         ssize_t len = getline((char**)&line->ptr, &line->cap, r);
  79         if (line->ptr == NULL) {
  80             fprintf(stderr, "\n");
  81             fprintf(stderr, ERROR_LINE("out of memory"));
  82             exit(BAD_ALLOC);
  83         }
  84 
  85         if (len < 0) {
  86             break;
  87         }
  88 
  89         unsigned char* ptr = line->ptr;
  90 
  91         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
  92         if (i == 0 && starts_with_bom(ptr, len)) {
  93             ptr += 3;
  94             len -= 3;
  95         }
  96 
  97         // replace trailing carriage-returns with line-feeds
  98         if (len >= 1 && ptr[len - 1] == '\r') {
  99             ptr[len - 1] = '\n';
 100         }
 101 
 102         // get rid of carriage-returns preceding line-feeds
 103         if (len >= 2 && ptr[len - 2] == '\r' && ptr[len - 1] == '\n') {
 104             ptr[len - 2] = '\n';
 105             len--;
 106         }
 107 
 108         // ignore trailing spaces
 109         while (len > 0 && ptr[len - 1] == ' ') {
 110             len--;
 111         }
 112 
 113         fwrite(ptr, 1, len, w);
 114         if (len < 1 || ptr[len - 1] != '\n') {
 115             fputc('\n', w);
 116         }
 117     }
 118 
 119     if (!live_lines) {
 120         fflush(w);
 121     }
 122 }
 123 
 124 // handle_file handles data from the filename given; returns false only when
 125 // the file can't be opened
 126 bool handle_file(FILE* w, const char* path, slice* line, bool live_lines) {
 127     FILE* f = fopen(path, "rb");
 128     if (f == NULL) {
 129         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 130         return false;
 131     }
 132 
 133     handle_reader(w, f, line, live_lines);
 134     fclose(f);
 135     return true;
 136 }
 137 
 138 // run returns the number of errors
 139 int run(char** args, size_t nargs, FILE* w, bool live_lines) {
 140     size_t dashes = 0;
 141     for (int i = 0; i < nargs; i++) {
 142         if (strcmp(args[i], "-") == 0) {
 143             dashes++;
 144         }
 145     }
 146 
 147     if (dashes > 1) {
 148         const char* m = "can't use the standard input (dash) more than once";
 149         fprintf(stderr, ERROR_LINE("%s"), m);
 150         return 1;
 151     }
 152 
 153     slice line;
 154     line.cap = 32 * 1024;
 155     line.ptr = malloc(line.cap);
 156 
 157     if (line.ptr == NULL) {
 158         fprintf(stderr, ERROR_LINE("out of memory"));
 159         exit(BAD_ALLOC);
 160     }
 161 
 162     size_t errors = 0;
 163 
 164     for (int i = 0; i < nargs && !feof(w); i++) {
 165         if (strcmp(args[i], "-") == 0) {
 166             handle_reader(w, stdin, &line, live_lines);
 167             continue;
 168         }
 169 
 170         if (!handle_file(w, args[i], &line, live_lines)) {
 171             errors++;
 172         }
 173     }
 174 
 175     // use stdin when not given any filepaths
 176     if (nargs == 0) {
 177         handle_reader(w, stdin, &line, live_lines);
 178     }
 179 
 180     free(line.ptr);
 181     return errors;
 182 }
 183 
 184 int main(int argc, char** argv) {
 185 #ifdef _WIN32
 186     setmode(fileno(stdin), O_BINARY);
 187     // ensure output lines end in LF instead of CRLF on windows
 188     setmode(fileno(stdout), O_BINARY);
 189     setmode(fileno(stderr), O_BINARY);
 190 #endif
 191 
 192     if (argc > 1) {
 193         if (
 194             strcmp(argv[1], "-h") == 0 ||
 195             strcmp(argv[1], "-help") == 0 ||
 196             strcmp(argv[1], "--h") == 0 ||
 197             strcmp(argv[1], "--help") == 0
 198         ) {
 199             fprintf(stdout, "%s", info);
 200             return 0;
 201         }
 202     }
 203 
 204     size_t nargs = argc - 1;
 205     char** args = argv + 1;
 206     bool buffered = false;
 207 
 208     if (nargs > 0) {
 209         if (
 210             strcmp(args[0], "-buffered") == 0 ||
 211             strcmp(args[0], "--buffered") == 0
 212         ) {
 213             buffered = true;
 214             nargs--;
 215             args++;
 216         }
 217     }
 218 
 219     if (nargs > 0 && strcmp(args[0], "--") == 0) {
 220         nargs--;
 221         args++;
 222     }
 223 
 224     const int fd = fileno(stdout);
 225     const bool live_lines = !buffered && lseek(fd, 0, SEEK_CUR) != 0;
 226     if (live_lines) {
 227         setvbuf(stdout, NULL, _IOLBF, 0);
 228     } else {
 229         setvbuf(stdout, NULL, _IOFBF, 0);
 230     }
 231     return run(args, nargs, stdout, live_lines) == 0 ? 0 : 1;
 232 }