File: jsons.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./jsons ./jsons.c 29 30 To use empty strings for missing trailing cells, you can build it using 31 32 cc -Wall -s -O2 -D PURE_JSONS -o ./jsons ./jsons.c 33 */ 34 35 #include <stdbool.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #ifdef _WIN32 41 #include <fcntl.h> 42 #include <windows.h> 43 #endif 44 45 #ifndef PURE_JSONS 46 #define NULL_TRAILS 47 #endif 48 49 #ifdef NULL_TRAILS 50 const char* info = "" 51 "jsons [filename...]\n" 52 "\n" 53 "Turn a TSV table into JSON Strings, which is a JSON array with objects of\n" 54 "string values. The only non-string values are nulls, which are used for any\n" 55 "missing trailing fields.\n" 56 ""; 57 #else 58 const char* info = "" 59 "jsons [filename...]\n" 60 "\n" 61 "Turn a TSV table into JSON Strings, which is a JSON array with objects of\n" 62 "string values.\n" 63 ""; 64 #endif 65 66 const char* no_line_memory_msg = "can't get enough memory to read lines"; 67 68 // span is a region of bytes in memory 69 typedef struct span { 70 // ptr is the starting place of the region 71 unsigned char* ptr; 72 73 // len is how many bytes are in the region 74 size_t len; 75 } span; 76 77 // slice is a growable region of bytes in memory 78 typedef struct slice { 79 // ptr is the starting place of the region 80 unsigned char* ptr; 81 82 // len is how many bytes are currently being used 83 size_t len; 84 85 // cap is how many bytes the memory region has available 86 size_t cap; 87 } slice; 88 89 bool starts_with_bom(const unsigned char* b, const size_t n) { 90 return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); 91 } 92 93 // emit_json_item emits JSON strings for the TSV items given: such items are 94 // strings without line-feeds, carriage-returns, or tabs 95 void emit_json_item(FILE* w, const unsigned char* ptr, size_t len) { 96 putc('"', w); 97 for (size_t i = 0; i < len; i++) { 98 const unsigned char b = ptr[i]; 99 if (b == '"' || b == '\\') { 100 putc('\\', w); 101 } 102 putc(b, w); 103 } 104 putc('"', w); 105 } 106 107 // emit_json_item_str is like function emit_json_item, but taking c-strings 108 void emit_json_item_str(FILE* w, const char* s) { 109 putc('"', w); 110 for (; *s != 0; s++) { 111 const unsigned char b = *s; 112 if (b == '"' || b == '\\') { 113 putc('\\', w); 114 } 115 putc(b, w); 116 } 117 putc('"', w); 118 } 119 120 void handle_row(FILE* w, char** keys, size_t num_keys, span line) { 121 size_t start = 0; 122 size_t len = 0; 123 size_t got = 0; 124 125 putc(' ', w); 126 putc(' ', w); 127 putc('{', w); 128 129 for (size_t i = 0; i < line.len; i++) { 130 if (line.ptr[i] == '\t') { 131 if (got > 0) { 132 fprintf(w, ", "); 133 } 134 135 emit_json_item_str(w, keys[got]); 136 fprintf(w, ": "); 137 emit_json_item(w, line.ptr + start, len); 138 139 start = i + 1; 140 len = 0; 141 got++; 142 } else { 143 len++; 144 } 145 } 146 147 if (start < line.len) { 148 if (got > 0) { 149 fprintf(w, ", "); 150 } 151 152 emit_json_item_str(w, keys[got]); 153 fprintf(w, ": "); 154 emit_json_item(w, line.ptr + start, len); 155 got++; 156 } 157 158 for (size_t i = got; i < num_keys; i++) { 159 if (i > 0) { 160 fprintf(w, ", "); 161 } 162 163 emit_json_item_str(w, keys[got]); 164 #ifdef NULL_TRAILS 165 fprintf(w, ": null"); 166 #else 167 fprintf(w, ": \"\""); 168 #endif 169 } 170 171 putc('}', w); 172 } 173 174 void show_error(FILE* w, const char* msg) { 175 putc('\n', w); 176 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg); 177 } 178 179 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all 180 // CR-LF pairs into single LF bytes 181 bool handle_reader(FILE* w, FILE* r, slice* line) { 182 span trimmed; 183 char** keys = NULL; 184 size_t num_keys = 0; 185 size_t i = 0; 186 187 for (i = 0; !feof(w); i++) { 188 ssize_t len = getline((char**)&line->ptr, &line->cap, r); 189 if (len < 0) { 190 break; 191 } 192 193 if (line->ptr == NULL) { 194 show_error(w, no_line_memory_msg); 195 exit(1); 196 } 197 198 line->len = len; 199 trimmed.ptr = line->ptr; 200 trimmed.len = line->len; 201 202 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 203 if (i == 0 && starts_with_bom(trimmed.ptr, trimmed.len)) { 204 trimmed.ptr += 3; 205 trimmed.len -= 3; 206 len = trimmed.len; 207 } 208 209 const unsigned char* p = trimmed.ptr; 210 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 211 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 212 trimmed.len -= 2; 213 } else if (len >= 1 && p[len - 1] == '\n') { 214 trimmed.len--; 215 } 216 217 if (i == 0) { 218 for (size_t j = 0; j < trimmed.len; j++) { 219 if (trimmed.ptr[j] == '\t') { 220 num_keys++; 221 } 222 } 223 num_keys++; 224 225 keys = malloc(sizeof(char*) * num_keys); 226 if (keys == NULL) { 227 show_error(w, no_line_memory_msg); 228 exit(1); 229 } 230 231 keys[0] = malloc(trimmed.len + 1); 232 if (keys[0] == NULL) { 233 show_error(w, no_line_memory_msg); 234 exit(1); 235 } 236 237 char* copy = keys[0]; 238 memcpy(copy, trimmed.ptr, trimmed.len); 239 copy[trimmed.len] = 0; 240 241 for (size_t j = 0, k = 1; j < trimmed.len; j++) { 242 if (copy[j] == '\t') { 243 copy[j] = 0; 244 keys[k] = copy + j + 1; 245 k++; 246 } 247 } 248 continue; 249 } 250 251 if (i == 1) { 252 putc('[', w); 253 } else { 254 putc(',', w); 255 } 256 putc('\n', w); 257 fflush(w); 258 259 handle_row(w, keys, num_keys, trimmed); 260 } 261 262 if (i > 0) { 263 fprintf(w, "\n]\n"); 264 } else { 265 fprintf(w, "[]\n"); 266 } 267 268 if (keys != NULL) { 269 if (keys[0] != NULL) { 270 free(keys[0]); 271 } 272 free(keys); 273 } 274 275 fflush(w); 276 return true; 277 } 278 279 // handle_file handles data from the filename given; returns false only when 280 // the file can't be opened 281 bool handle_file(FILE* w, const char* fname, slice* line) { 282 FILE* f = fopen(fname, "rb"); 283 if (f == NULL) { 284 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname); 285 return false; 286 } 287 288 const bool ok = handle_reader(w, f, line); 289 fclose(f); 290 return ok; 291 } 292 293 bool run(FILE* w, const char* fname) { 294 slice line; 295 line.len = 0; 296 line.cap = 32 * 1024; 297 line.ptr = malloc(line.cap); 298 299 if (line.ptr == NULL) { 300 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg); 301 return false; 302 } 303 304 // filename `-` means use the standard input 305 if (fname[0] == '-' && fname[1] == 0) { 306 const bool ok = handle_reader(w, stdin, &line); 307 free(line.ptr); 308 return ok; 309 } 310 311 const bool ok = handle_file(w, fname, &line); 312 free(line.ptr); 313 return ok; 314 } 315 316 int main(int argc, char** argv) { 317 #ifdef _WIN32 318 setmode(fileno(stdin), O_BINARY); 319 // ensure output lines end in LF instead of CRLF on windows 320 setmode(fileno(stdout), O_BINARY); 321 setmode(fileno(stderr), O_BINARY); 322 #endif 323 324 if (argc > 1) { 325 if ( 326 strcmp(argv[1], "-h") == 0 || 327 strcmp(argv[1], "-help") == 0 || 328 strcmp(argv[1], "--h") == 0 || 329 strcmp(argv[1], "--help") == 0 330 ) { 331 fprintf(stdout, "%s", info); 332 return 0; 333 } 334 } 335 336 if (argc > 2) { 337 const char* msg = "can't use more than 1 named input"; 338 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg); 339 return 1; 340 } 341 342 return run(stdout, (argc > 1) ? argv[1] : "-") ? 0 : 1; 343 }