File: catl.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27     cc -Wall -s -O2 -o ./catl ./catl.c
  28 */
  29 
  30 #include <fcntl.h>
  31 #include <stdbool.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #endif
  39 
  40 // slice is a growable region of bytes in memory
  41 typedef struct slice {
  42     // ptr is the starting place of the region
  43     unsigned char* ptr;
  44 
  45     // len is how many bytes are currently being used
  46     size_t len;
  47 
  48     // cap is how many bytes the memory region has available
  49     size_t cap;
  50 } slice;
  51 
  52 // init_slice is the constructor for type slice
  53 void init_slice(slice* s, size_t cap) {
  54     s->ptr = malloc(cap);
  55     s->len = 0;
  56     s->cap = cap;
  57 }
  58 
  59 bool starts_with_bom(const unsigned char* b, const size_t n) {
  60     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
  61 }
  62 
  63 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all
  64 // CR-LF pairs into single LF bytes
  65 bool handle_reader(FILE* w, FILE* r, slice* line) {
  66     slice trimmed;
  67 
  68     for (size_t i = 0; !feof(w); i++) {
  69         int len = getline((char**)&line->ptr, &line->cap, r);
  70         if (len < 0) {
  71             break;
  72         }
  73 
  74         line->len = len;
  75         trimmed.ptr = line->ptr;
  76         trimmed.len = line->len;
  77 
  78         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
  79         if (i == 0 && starts_with_bom(trimmed.ptr, trimmed.len)) {
  80             trimmed.ptr += 3;
  81         }
  82 
  83         const unsigned char* p = trimmed.ptr;
  84         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
  85         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
  86             trimmed.len -= 2;
  87         } else if (len >= 1 && p[len - 1] == '\n') {
  88             trimmed.len--;
  89         }
  90 
  91         if (fwrite(trimmed.ptr, trimmed.len, 1, w) < 1) {
  92             return true;
  93         }
  94         putc('\n', w);
  95         fflush(w);
  96     }
  97 
  98     fflush(w);
  99     return true;
 100 }
 101 
 102 // handle_file handles data from the filename given; returns false only when
 103 // the file can't be opened
 104 bool handle_file(FILE* w, char* fname, slice* line) {
 105     FILE* f = fopen(fname, "rb");
 106     if (f == NULL) {
 107         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname);
 108         return false;
 109     }
 110 
 111     const bool ok = handle_reader(w, f, line);
 112     fclose(f);
 113     return ok;
 114 }
 115 
 116 // run returns the number of errors
 117 size_t run(int argc, char** argv, slice* line) {
 118     size_t dashes = 0;
 119     for (int i = 1; i < argc; i++) {
 120         if (argv[i][0] == '-' && argv[i][1] == 0) {
 121             dashes++;
 122         }
 123     }
 124 
 125     if (dashes > 1) {
 126         const char* msg = "can't use a dash (stdin) as input more than once";
 127         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg);
 128         return 1;
 129     }
 130 
 131     // use stdin when not given any filepaths
 132     if (argc <= 1) {
 133         handle_reader(stdout, stdin, line);
 134         return 0;
 135     }
 136 
 137     size_t errors = 0;
 138     for (int i = 1; i < argc && !feof(stdout); i++) {
 139         if (argv[i][0] == '-' && argv[i][1] == 0) {
 140             if (!handle_reader(stdout, stdin, line)) {
 141                 errors++;
 142             }
 143             continue;
 144         }
 145 
 146         if (!handle_file(stdout, argv[i], line)) {
 147             errors++;
 148         }
 149     }
 150 
 151     return errors;
 152 }
 153 
 154 int main(int argc, char** argv) {
 155 #ifdef _WIN32
 156     setmode(fileno(stdin), O_BINARY);
 157     // ensure output lines end in LF instead of CRLF on windows
 158     setmode(fileno(stdout), O_BINARY);
 159     setmode(fileno(stderr), O_BINARY);
 160 #endif
 161 
 162     // disable automatic stdio buffering, in favor of explicit buffering
 163     setvbuf(stdin, NULL, _IONBF, 0);
 164     setvbuf(stdout, NULL, _IONBF, 0);
 165     setvbuf(stderr, NULL, _IONBF, 0);
 166 
 167     slice line;
 168     init_slice(&line, 16 * 1024);
 169     if (line.ptr == NULL) {
 170         char* msg = "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n";
 171         fprintf(stderr, msg);
 172         return 1;
 173     }
 174 
 175     const int res = run(argc, argv, &line) == 0 ? 0 : 1;
 176     free(line.ptr);
 177     return res;
 178 }