File: catl.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./catl ./catl.c
  29 */
  30 
  31 #include <fcntl.h>
  32 #include <stdbool.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 
  37 #ifdef _WIN32
  38 #include <windows.h>
  39 #endif
  40 
  41 const char* info = ""
  42 "catl [filenames...]\n"
  43 "\n"
  44 "Concatenate lines from all the named sources given. The name `-` stands for\n"
  45 "the standard input. When no names are given, the standard input is used by\n"
  46 "default.\n"
  47 "";
  48 
  49 const char* no_line_memory_msg = "can't get enough memory to read lines";
  50 
  51 // slice is a growable region of bytes in memory
  52 typedef struct slice {
  53     // ptr is the starting place of the region
  54     unsigned char* ptr;
  55 
  56     // len is how many bytes are currently being used
  57     size_t len;
  58 
  59     // cap is how many bytes the memory region has available
  60     size_t cap;
  61 } slice;
  62 
  63 bool starts_with_bom(const unsigned char* b, const size_t n) {
  64     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
  65 }
  66 
  67 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all
  68 // CR-LF pairs into single LF bytes
  69 bool handle_reader(FILE* w, FILE* r, slice* line) {
  70     slice trimmed;
  71 
  72     for (size_t i = 0; !feof(w); i++) {
  73         int len = getline((char**)&line->ptr, &line->cap, r);
  74         if (len < 0) {
  75             break;
  76         }
  77 
  78         if (line->ptr == NULL) {
  79             putc('\n', w);
  80             fflush(w);
  81 
  82             fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
  83             exit(1);
  84         }
  85 
  86         line->len = len;
  87         trimmed.ptr = line->ptr;
  88         trimmed.len = line->len;
  89 
  90         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
  91         if (i == 0 && starts_with_bom(trimmed.ptr, trimmed.len)) {
  92             trimmed.ptr += 3;
  93             trimmed.len -= 3;
  94             len = trimmed.len;
  95         }
  96 
  97         const unsigned char* p = trimmed.ptr;
  98         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
  99         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 100             trimmed.len -= 2;
 101         } else if (len >= 1 && p[len - 1] == '\n') {
 102             trimmed.len--;
 103         }
 104 
 105         fwrite(trimmed.ptr, trimmed.len, 1, w);
 106         putc('\n', w);
 107         fflush(w);
 108     }
 109 
 110     return true;
 111 }
 112 
 113 // handle_file handles data from the filename given; returns false only when
 114 // the file can't be opened
 115 bool handle_file(FILE* w, char* fname, slice* line) {
 116     FILE* f = fopen(fname, "rb");
 117     if (f == NULL) {
 118         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname);
 119         return false;
 120     }
 121 
 122     const bool ok = handle_reader(w, f, line);
 123     fclose(f);
 124     return ok;
 125 }
 126 
 127 // run returns the number of errors
 128 int run(int argc, char** argv, FILE* w) {
 129     size_t dashes = 0;
 130     for (int i = 1; i < argc; i++) {
 131         if (argv[i][0] == '-' && argv[i][1] == 0) {
 132             dashes++;
 133         }
 134     }
 135 
 136     if (dashes > 1) {
 137         const char* msg = "can't use the standard input (dash) more than once";
 138         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg);
 139         return 1;
 140     }
 141 
 142     slice line;
 143     line.len = 0;
 144     line.cap = 32 * 1024;
 145     line.ptr = malloc(line.cap);
 146 
 147     if (line.ptr == NULL) {
 148         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
 149         return 1;
 150     }
 151 
 152     // use stdin when not given any filepaths
 153     if (argc <= 1) {
 154         handle_reader(w, stdin, &line);
 155         free(line.ptr);
 156         return 0;
 157     }
 158 
 159     size_t errors = 0;
 160     for (int i = 1; i < argc && !feof(stdout); i++) {
 161         if (argv[i][0] == '-' && argv[i][1] == 0) {
 162             if (!handle_reader(w, stdin, &line)) {
 163                 errors++;
 164             }
 165             continue;
 166         }
 167 
 168         if (!handle_file(w, argv[i], &line)) {
 169             errors++;
 170         }
 171     }
 172 
 173     free(line.ptr);
 174     return errors;
 175 }
 176 
 177 int main(int argc, char** argv) {
 178 #ifdef _WIN32
 179     setmode(fileno(stdin), O_BINARY);
 180     // ensure output lines end in LF instead of CRLF on windows
 181     setmode(fileno(stdout), O_BINARY);
 182     setmode(fileno(stderr), O_BINARY);
 183 #endif
 184 
 185     if (argc > 1) {
 186         if (
 187             strcmp(argv[1], "-h") == 0 ||
 188             strcmp(argv[1], "-help") == 0 ||
 189             strcmp(argv[1], "--h") == 0 ||
 190             strcmp(argv[1], "--help") == 0
 191         ) {
 192             fprintf(stdout, "%s", info);
 193             return 0;
 194         }
 195     }
 196 
 197     return run(argc, argv, stdout) == 0 ? 0 : 1;
 198 }