File: jsons.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -flto -o ./jsons ./jsons.c 29 30 To use empty strings for missing trailing cells, you can build it using 31 32 cc -Wall -s -O3 -flto -D PURE_JSONS -o ./jsons ./jsons.c 33 */ 34 35 #include <stdbool.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #ifdef _WIN32 41 #include <fcntl.h> 42 #include <windows.h> 43 #endif 44 45 #ifdef RED_ERRORS 46 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 47 #ifdef __APPLE__ 48 #define ERROR_STYLE "\x1b[31m" 49 #endif 50 #define RESET_STYLE "\x1b[0m" 51 #else 52 #define ERROR_STYLE 53 #define RESET_STYLE 54 #endif 55 56 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 57 58 #ifndef PURE_JSONS 59 #define NULL_TRAILS 60 #endif 61 62 #ifdef NULL_TRAILS 63 const char* info = "" 64 "jsons [filename...]\n" 65 "\n" 66 "Turn a TSV table into JSON Strings, which is a JSON array with objects of\n" 67 "string values. The only non-string values are nulls, which are used for any\n" 68 "missing trailing fields.\n" 69 ""; 70 #else 71 const char* info = "" 72 "jsons [filename...]\n" 73 "\n" 74 "Turn a TSV table into JSON Strings, which is a JSON array with objects of\n" 75 "string values.\n" 76 ""; 77 #endif 78 79 // span is a region of bytes in memory 80 typedef struct span { 81 // ptr is the starting place of the region 82 unsigned char* ptr; 83 84 // len is how many bytes are in the region 85 size_t len; 86 } span; 87 88 // slice is a growable region of bytes in memory 89 typedef struct slice { 90 // ptr is the starting place of the region 91 unsigned char* ptr; 92 93 // len is how many bytes are currently being used 94 size_t len; 95 96 // cap is how many bytes the memory region has available 97 size_t cap; 98 } slice; 99 100 bool starts_with_bom(const unsigned char* b, const size_t n) { 101 return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); 102 } 103 104 // emit_json_item emits JSON strings for the TSV items given: such items are 105 // strings without line-feeds, carriage-returns, or tabs 106 void emit_json_item(FILE* w, const unsigned char* ptr, size_t len) { 107 fputc('"', w); 108 for (size_t i = 0; i < len; i++) { 109 const unsigned char b = ptr[i]; 110 if (b == '"' || b == '\\') { 111 fputc('\\', w); 112 } 113 fputc(b, w); 114 } 115 fputc('"', w); 116 } 117 118 // emit_json_item_str is like function emit_json_item, but taking c-strings 119 void emit_json_item_str(FILE* w, const char* s) { 120 fputc('"', w); 121 for (; *s != 0; s++) { 122 const unsigned char b = *s; 123 if (b == '"' || b == '\\') { 124 fputc('\\', w); 125 } 126 fputc(b, w); 127 } 128 fputc('"', w); 129 } 130 131 void handle_row(FILE* w, char** keys, size_t num_keys, span line) { 132 size_t start = 0; 133 size_t len = 0; 134 size_t got = 0; 135 136 fputc(' ', w); 137 fputc(' ', w); 138 fputc('{', w); 139 140 for (size_t i = 0; i < line.len; i++) { 141 if (line.ptr[i] == '\t') { 142 if (got > 0) { 143 fprintf(w, ", "); 144 } 145 146 emit_json_item_str(w, keys[got]); 147 fprintf(w, ": "); 148 emit_json_item(w, line.ptr + start, len); 149 150 start = i + 1; 151 len = 0; 152 got++; 153 } else { 154 len++; 155 } 156 } 157 158 if (start < line.len) { 159 if (got > 0) { 160 fprintf(w, ", "); 161 } 162 163 emit_json_item_str(w, keys[got]); 164 fprintf(w, ": "); 165 emit_json_item(w, line.ptr + start, len); 166 got++; 167 } 168 169 for (size_t i = got; i < num_keys; i++) { 170 if (i > 0) { 171 fprintf(w, ", "); 172 } 173 174 emit_json_item_str(w, keys[got]); 175 #ifdef NULL_TRAILS 176 fprintf(w, ": null"); 177 #else 178 fprintf(w, ": \"\""); 179 #endif 180 } 181 182 fputc('}', w); 183 } 184 185 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all 186 // CR-LF pairs into single LF bytes 187 bool handle_reader(FILE* w, FILE* r, slice* line) { 188 span trimmed; 189 char** keys = NULL; 190 size_t num_keys = 0; 191 size_t i = 0; 192 193 for (i = 0; !feof(w); i++) { 194 ssize_t len = getline((char**)&line->ptr, &line->cap, r); 195 if (len < 0) { 196 break; 197 } 198 199 if (line->ptr == NULL) { 200 fprintf(stderr, ERROR_LINE("out of memory")); 201 exit(1); 202 } 203 204 line->len = len; 205 trimmed.ptr = line->ptr; 206 trimmed.len = line->len; 207 208 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 209 if (i == 0 && starts_with_bom(trimmed.ptr, trimmed.len)) { 210 trimmed.ptr += 3; 211 trimmed.len -= 3; 212 len = trimmed.len; 213 } 214 215 const unsigned char* p = trimmed.ptr; 216 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 217 if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') { 218 trimmed.len -= 2; 219 } else if (len >= 1 && p[len - 1] == '\n') { 220 trimmed.len--; 221 } 222 223 if (i == 0) { 224 for (size_t j = 0; j < trimmed.len; j++) { 225 if (trimmed.ptr[j] == '\t') { 226 num_keys++; 227 } 228 } 229 num_keys++; 230 231 keys = malloc(sizeof(char*) * num_keys); 232 if (keys == NULL) { 233 fprintf(stderr, ERROR_LINE("out of memory")); 234 exit(1); 235 } 236 237 keys[0] = malloc(trimmed.len + 1); 238 if (keys[0] == NULL) { 239 fprintf(stderr, ERROR_LINE("out of memory")); 240 exit(1); 241 } 242 243 char* copy = keys[0]; 244 memcpy(copy, trimmed.ptr, trimmed.len); 245 copy[trimmed.len] = 0; 246 247 for (size_t j = 0, k = 1; j < trimmed.len; j++) { 248 if (copy[j] == '\t') { 249 copy[j] = 0; 250 keys[k] = copy + j + 1; 251 k++; 252 } 253 } 254 continue; 255 } 256 257 if (i == 1) { 258 fputc('[', w); 259 } else { 260 fputc(',', w); 261 } 262 fputc('\n', w); 263 264 handle_row(w, keys, num_keys, trimmed); 265 } 266 267 if (i > 0) { 268 fprintf(w, "\n]\n"); 269 } else { 270 fprintf(w, "[]\n"); 271 } 272 273 if (keys != NULL) { 274 if (keys[0] != NULL) { 275 free(keys[0]); 276 } 277 free(keys); 278 } 279 280 return true; 281 } 282 283 // handle_file handles data from the filename given; returns false only when 284 // the file can't be opened 285 bool handle_file(FILE* w, const char* path, slice* line) { 286 FILE* f = fopen(path, "rb"); 287 if (f == NULL) { 288 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 289 return false; 290 } 291 292 const bool ok = handle_reader(w, f, line); 293 fclose(f); 294 return ok; 295 } 296 297 bool run(FILE* w, const char* path) { 298 slice line; 299 line.len = 0; 300 line.cap = 32 * 1024; 301 line.ptr = malloc(line.cap); 302 303 if (line.ptr == NULL) { 304 fprintf(stderr, ERROR_LINE("out of memory")); 305 return false; 306 } 307 308 // filename `-` means use the standard input 309 if (path[0] == '-' && path[1] == 0) { 310 const bool ok = handle_reader(w, stdin, &line); 311 free(line.ptr); 312 return ok; 313 } 314 315 const bool ok = handle_file(w, path, &line); 316 free(line.ptr); 317 return ok; 318 } 319 320 int main(int argc, char** argv) { 321 #ifdef _WIN32 322 setmode(fileno(stdin), O_BINARY); 323 // ensure output lines end in LF instead of CRLF on windows 324 setmode(fileno(stdout), O_BINARY); 325 setmode(fileno(stderr), O_BINARY); 326 #endif 327 328 if (argc > 1) { 329 if ( 330 strcmp(argv[1], "-h") == 0 || 331 strcmp(argv[1], "-help") == 0 || 332 strcmp(argv[1], "--h") == 0 || 333 strcmp(argv[1], "--help") == 0 334 ) { 335 fprintf(stdout, "%s", info); 336 return 0; 337 } 338 } 339 340 if (argc > 2) { 341 const char* msg = "can't use more than 1 named input"; 342 fprintf(stderr, ERROR_LINE("%s"), msg); 343 return 1; 344 } 345 346 return run(stdout, (argc > 1) ? argv[1] : "-") ? 0 : 1; 347 }