File: catl.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./catl ./catl.c
  29 */
  30 
  31 #include <fcntl.h>
  32 #include <stdbool.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 
  37 #ifdef _WIN32
  38 #include <windows.h>
  39 #endif
  40 
  41 const char* line_memory_error_msg =
  42     ""
  43     "\x1b[31mcan't get memory for the line-scanner\x1b[0m\n";
  44 
  45 // slice is a growable region of bytes in memory
  46 typedef struct slice {
  47     // ptr is the starting place of the region
  48     unsigned char* ptr;
  49 
  50     // len is how many bytes are currently being used
  51     size_t len;
  52 
  53     // cap is how many bytes the memory region has available
  54     size_t cap;
  55 } slice;
  56 
  57 // init_slice is the constructor for type slice
  58 void init_slice(slice* s, size_t cap) {
  59     s->ptr = malloc(cap);
  60     s->len = 0;
  61     s->cap = cap;
  62 }
  63 
  64 bool starts_with_bom(const unsigned char* b, const size_t n) {
  65     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
  66 }
  67 
  68 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all
  69 // CR-LF pairs into single LF bytes
  70 bool handle_reader(FILE* w, FILE* r, slice* line) {
  71     slice trimmed;
  72 
  73     for (size_t i = 0; !feof(w); i++) {
  74         int len = getline((char**)&line->ptr, &line->cap, r);
  75         if (len < 0) {
  76             break;
  77         }
  78 
  79         if (line->ptr == NULL) {
  80             putc('\n', w);
  81             fprintf(stderr, line_memory_error_msg);
  82             exit(1);
  83         }
  84 
  85         line->len = len;
  86         trimmed.ptr = line->ptr;
  87         trimmed.len = line->len;
  88 
  89         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
  90         if (i == 0 && starts_with_bom(trimmed.ptr, trimmed.len)) {
  91             trimmed.ptr += 3;
  92             trimmed.len -= 3;
  93             len = trimmed.len;
  94         }
  95 
  96         const unsigned char* p = trimmed.ptr;
  97         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
  98         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
  99             trimmed.len -= 2;
 100         } else if (len >= 1 && p[len - 1] == '\n') {
 101             trimmed.len--;
 102         }
 103 
 104         fwrite(trimmed.ptr, trimmed.len, 1, w);
 105         putc('\n', w);
 106     }
 107 
 108     return true;
 109 }
 110 
 111 // handle_file handles data from the filename given; returns false only when
 112 // the file can't be opened
 113 bool handle_file(FILE* w, char* fname, slice* line) {
 114     FILE* f = fopen(fname, "rb");
 115     if (f == NULL) {
 116         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname);
 117         return false;
 118     }
 119 
 120     const bool ok = handle_reader(w, f, line);
 121     fclose(f);
 122     return ok;
 123 }
 124 
 125 // run returns the number of errors
 126 int run(int argc, char** argv, FILE* w, slice* line) {
 127     size_t dashes = 0;
 128     for (int i = 1; i < argc; i++) {
 129         if (argv[i][0] == '-' && argv[i][1] == 0) {
 130             dashes++;
 131         }
 132     }
 133 
 134     if (dashes > 1) {
 135         const char* msg = "can't use a dash (stdin) as input more than once";
 136         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg);
 137         return 1;
 138     }
 139 
 140     // use stdin when not given any filepaths
 141     if (argc <= 1) {
 142         handle_reader(w, stdin, line);
 143         return 0;
 144     }
 145 
 146     size_t errors = 0;
 147     for (int i = 1; i < argc && !feof(stdout); i++) {
 148         if (argv[i][0] == '-' && argv[i][1] == 0) {
 149             if (!handle_reader(w, stdin, line)) {
 150                 errors++;
 151             }
 152             continue;
 153         }
 154 
 155         if (!handle_file(w, argv[i], line)) {
 156             errors++;
 157         }
 158     }
 159 
 160     return errors;
 161 }
 162 
 163 int main(int argc, char** argv) {
 164 #ifdef _WIN32
 165     setmode(fileno(stdin), O_BINARY);
 166     // ensure output lines end in LF instead of CRLF on windows
 167     setmode(fileno(stdout), O_BINARY);
 168     setmode(fileno(stderr), O_BINARY);
 169 #endif
 170 
 171     slice line;
 172     init_slice(&line, 32 * 1024);
 173     if (line.ptr == NULL) {
 174         fprintf(stderr, line_memory_error_msg);
 175         return 1;
 176     }
 177 
 178     const int res = run(argc, argv, stdout, &line) == 0 ? 0 : 1;
 179     free(line.ptr);
 180     return res;
 181 }