File: jsons.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -flto -o ./jsons ./jsons.c 29 30 To use null values for missing trailing cells, you can build it using 31 32 cc -Wall -s -O3 -flto -D NULL_TRAILS -o ./jsons ./jsons.c 33 */ 34 35 #include <stdbool.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #ifdef _WIN32 41 #include <fcntl.h> 42 #include <windows.h> 43 #endif 44 45 #ifdef RED_ERRORS 46 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 47 #ifdef __APPLE__ 48 #define ERROR_STYLE "\x1b[31m" 49 #endif 50 #define RESET_STYLE "\x1b[0m" 51 #else 52 #define ERROR_STYLE 53 #define RESET_STYLE 54 #endif 55 56 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 57 58 #define BAD_ALLOC 2 59 60 #ifdef NULL_TRAILS 61 const char* info = "" 62 "jsons [filename...]\n" 63 "\n" 64 "Turn a TSV table into JSON Strings, which is a JSON array with objects of\n" 65 "string values. The only non-string values are nulls, which are used for any\n" 66 "missing trailing fields.\n" 67 ""; 68 #else 69 const char* info = "" 70 "jsons [filename...]\n" 71 "\n" 72 "Turn a TSV table into JSON Strings, which is a JSON array with objects of\n" 73 "string values.\n" 74 ""; 75 #endif 76 77 // span is a region of bytes in memory 78 typedef struct span { 79 // ptr is the starting place of the region 80 unsigned char* ptr; 81 82 // len is how many bytes are in the region 83 size_t len; 84 } span; 85 86 // slice is a growable region of bytes in memory 87 typedef struct slice { 88 // ptr is the starting place of the region 89 unsigned char* ptr; 90 91 // cap is how many bytes the memory region has available 92 size_t cap; 93 } slice; 94 95 bool starts_with_bom(const unsigned char* b, const size_t n) { 96 return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); 97 } 98 99 // emit_json_item emits JSON strings for the TSV items given: such items are 100 // strings without line-feeds, carriage-returns, or tabs 101 void emit_json_item(FILE* w, const unsigned char* ptr, size_t len) { 102 fputc('"', w); 103 for (size_t i = 0; i < len; i++) { 104 const unsigned char b = ptr[i]; 105 if (b == '"' || b == '\\') { 106 fputc('\\', w); 107 } 108 fputc(b, w); 109 } 110 fputc('"', w); 111 } 112 113 // emit_json_item_str is like function emit_json_item, but taking c-strings 114 void emit_json_item_str(FILE* w, const char* s) { 115 fputc('"', w); 116 for (; *s != 0; s++) { 117 const unsigned char b = *s; 118 if (b == '"' || b == '\\') { 119 fputc('\\', w); 120 } 121 fputc(b, w); 122 } 123 fputc('"', w); 124 } 125 126 void handle_row(FILE* w, char** keys, size_t num_keys, span line) { 127 size_t start = 0; 128 size_t len = 0; 129 size_t got = 0; 130 131 fputc(' ', w); 132 fputc(' ', w); 133 fputc('{', w); 134 135 for (size_t i = 0; i < line.len; i++) { 136 if (line.ptr[i] == '\t') { 137 if (got > 0) { 138 fprintf(w, ", "); 139 } 140 141 emit_json_item_str(w, keys[got]); 142 fprintf(w, ": "); 143 emit_json_item(w, line.ptr + start, len); 144 145 start = i + 1; 146 len = 0; 147 got++; 148 } else { 149 len++; 150 } 151 } 152 153 if (start < line.len) { 154 if (got > 0) { 155 fprintf(w, ", "); 156 } 157 158 emit_json_item_str(w, keys[got]); 159 fprintf(w, ": "); 160 emit_json_item(w, line.ptr + start, len); 161 got++; 162 } 163 164 for (size_t i = got; i < num_keys; i++) { 165 if (i > 0) { 166 fprintf(w, ", "); 167 } 168 169 emit_json_item_str(w, keys[got]); 170 #ifdef NULL_TRAILS 171 fprintf(w, ": null"); 172 #else 173 fprintf(w, ": \"\""); 174 #endif 175 } 176 177 fputc('}', w); 178 } 179 180 void handle_reader(FILE* w, FILE* r, slice* line) { 181 char** keys = NULL; 182 size_t num_keys = 0; 183 size_t i = 0; 184 185 for (i = 0; !feof(w); i++) { 186 ssize_t len = getline((char**)&line->ptr, &line->cap, r); 187 if (line->ptr == NULL) { 188 fprintf(stderr, "\n"); 189 fprintf(stderr, ERROR_LINE("out of memory")); 190 exit(BAD_ALLOC); 191 } 192 193 if (len < 0) { 194 break; 195 } 196 197 unsigned char* ptr = line->ptr; 198 199 // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it 200 if (i == 0 && starts_with_bom(ptr, len)) { 201 ptr += 3; 202 len -= 3; 203 } 204 205 // get rid of trailing line-feeds and CRLF end-of-line byte-pairs 206 if (len >= 2 && ptr[len - 2] == '\r' && ptr[len - 1] == '\n') { 207 len -= 2; 208 } else if (len >= 1 && ptr[len - 1] == '\n') { 209 len--; 210 } 211 212 if (i == 0) { 213 for (size_t j = 0; j < len; j++) { 214 if (ptr[j] == '\t') { 215 num_keys++; 216 } 217 } 218 num_keys++; 219 220 keys = malloc(sizeof(char*) * num_keys); 221 if (keys == NULL) { 222 fprintf(stderr, "\n"); 223 fprintf(stderr, ERROR_LINE("out of memory")); 224 exit(BAD_ALLOC); 225 } 226 227 keys[0] = malloc(len + 1); 228 if (keys[0] == NULL) { 229 fprintf(stderr, "\n"); 230 fprintf(stderr, ERROR_LINE("out of memory")); 231 exit(BAD_ALLOC); 232 } 233 234 char* copy = keys[0]; 235 memcpy(copy, ptr, len); 236 copy[len] = 0; 237 238 for (size_t j = 0, k = 1; j < len; j++) { 239 if (copy[j] == '\t') { 240 copy[j] = 0; 241 keys[k] = copy + j + 1; 242 k++; 243 } 244 } 245 continue; 246 } 247 248 if (i == 1) { 249 fputc('[', w); 250 } else { 251 fputc(',', w); 252 } 253 fputc('\n', w); 254 255 span s; 256 s.ptr = ptr; 257 s.len = len; 258 handle_row(w, keys, num_keys, s); 259 } 260 261 if (i > 0) { 262 fprintf(w, "\n]\n"); 263 } else { 264 fprintf(w, "[]\n"); 265 } 266 267 if (keys != NULL) { 268 if (keys[0] != NULL) { 269 free(keys[0]); 270 } 271 free(keys); 272 } 273 } 274 275 // handle_file handles data from the filename given; returns false only when 276 // the file can't be opened 277 bool handle_file(FILE* w, const char* path, slice* line) { 278 FILE* f = fopen(path, "rb"); 279 if (f == NULL) { 280 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 281 return false; 282 } 283 284 handle_reader(w, f, line); 285 fclose(f); 286 return true; 287 } 288 289 bool run(FILE* w, const char* path) { 290 slice line; 291 line.cap = 32 * 1024; 292 line.ptr = malloc(line.cap); 293 294 if (line.ptr == NULL) { 295 fprintf(stderr, "\n"); 296 fprintf(stderr, ERROR_LINE("out of memory")); 297 exit(BAD_ALLOC); 298 } 299 300 // filename `-` means use the standard input 301 if (strcmp(path, "-") == 0) { 302 handle_reader(w, stdin, &line); 303 free(line.ptr); 304 return true; 305 } 306 307 const bool ok = handle_file(w, path, &line); 308 free(line.ptr); 309 return ok; 310 } 311 312 int main(int argc, char** argv) { 313 #ifdef _WIN32 314 setmode(fileno(stdin), O_BINARY); 315 // ensure output lines end in LF instead of CRLF on windows 316 setmode(fileno(stdout), O_BINARY); 317 setmode(fileno(stderr), O_BINARY); 318 #endif 319 320 if (argc > 1) { 321 if ( 322 strcmp(argv[1], "-h") == 0 || 323 strcmp(argv[1], "-help") == 0 || 324 strcmp(argv[1], "--h") == 0 || 325 strcmp(argv[1], "--help") == 0 326 ) { 327 fprintf(stdout, "%s", info); 328 return 0; 329 } 330 } 331 332 if (argc > 2) { 333 const char* msg = "can't use more than 1 named input"; 334 fprintf(stderr, ERROR_LINE("%s"), msg); 335 return 1; 336 } 337 338 return run(stdout, (argc > 1) ? argv[1] : "-") ? 0 : 1; 339 }