File: detsv.cpp
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 g++ -Wall -O2 -s -o detsv detsv.cpp
  29 */
  30 
  31 #include <algorithm>
  32 #include <cstring>
  33 #include <fstream>
  34 #include <iostream>
  35 #include <string>
  36 #include <vector>
  37 
  38 #ifdef RED_ERRORS
  39 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  40 #ifdef __APPLE__
  41 #define ERROR_STYLE "\x1b[31m"
  42 #endif
  43 #define RESET_STYLE "\x1b[0m"
  44 #else
  45 #define ERROR_STYLE ""
  46 #define RESET_STYLE ""
  47 #endif
  48 
  49 using namespace std;
  50 
  51 const string info = ""
  52 "detsv [file...]\n"
  53 "\n"
  54 "Turn TSV tables into JSON data.\n"
  55 "";
  56 
  57 void de_bom(string &s) {
  58     // s.starts_with("\xef\xbb\xbf")
  59     if (s.size() >= 3 && s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') {
  60         s.erase(0, 3);
  61     }
  62 }
  63 
  64 void de_cr(string &s) {
  65     s.erase(remove(s.begin(), s.end(), '\r'), s.end());
  66 }
  67 
  68 void tab_split(const string& line, vector<string>& items) {
  69     size_t start = 0;
  70     size_t end = line.size();
  71 
  72     for (size_t i = 0; i < end; i++) {
  73         if (line[i] == '\t') {
  74             items.push_back(line.substr(start, i - start));
  75             start = i + 1;
  76         }
  77     }
  78 
  79     if (start < end) {
  80         items.push_back(line.substr(start, end - start + 1));
  81     }
  82 }
  83 
  84 void tab_split_view(const string& line, vector<string_view>& items) {
  85     size_t start = 0;
  86     size_t end = line.size();
  87 
  88     for (size_t i = 0; i < end; i++) {
  89         if (line[i] == '\t') {
  90             items.push_back(string_view(line).substr(start, i - start));
  91             start = i + 1;
  92         }
  93     }
  94 
  95     if (start < end) {
  96         items.push_back(string_view(line).substr(start, end - start + 1));
  97     }
  98 }
  99 
 100 void emit_json_string(const string& s) {
 101     cout << '"';
 102     for (auto c : s) {
 103         switch (c) {
 104         case '"':
 105         case '\\':
 106             cout << '\\' << c;
 107             break;
 108         default:
 109             cout << c;
 110             break;
 111         }
 112     }
 113     cout << '"';
 114 }
 115 
 116 void emit_json_string_view(const string_view& s) {
 117     cout << '"';
 118     for (auto c : s) {
 119         switch (c) {
 120         case '"':
 121         case '\\':
 122             cout << '\\' << c;
 123             break;
 124         default:
 125             cout << c;
 126             break;
 127         }
 128     }
 129     cout << '"';
 130 }
 131 
 132 // seems_json_number detects only a subset of valid json numbers for now
 133 bool seems_json_number(const string_view& s) {
 134     size_t dots = 0;
 135     size_t digits = 0;
 136 
 137     for (auto c : s) {
 138         if ('0' <= c && c <= '9') {
 139             digits++;
 140             continue;
 141         }
 142 
 143         if (c == '-' && digits > 0) {
 144             return false;
 145         }
 146 
 147         if (c == '.') {
 148             if (digits == 0 || dots > 0) {
 149                 return false;
 150             }
 151 
 152             dots++;
 153             digits = 0; // effectively demand digits after a dot
 154             continue;
 155         }
 156 
 157         return false;
 158     }
 159 
 160     return digits > 0;
 161 }
 162 
 163 void emit_json_value(const string_view& s) {
 164     // if (s == "") {
 165     //     cout << "null";
 166     //     return;
 167     // }
 168 
 169     // if (s == "null") {
 170     //     cout << s;
 171     //     return;
 172     // }
 173 
 174     // if (s == "true" || s == "false") {
 175     //     cout << s;
 176     //     return;
 177     // }
 178 
 179     // recognize numbers to avoid quoting them
 180     // if (seems_json_number(s)) {
 181     //     cout << s;
 182     //     return;
 183     // }
 184 
 185     emit_json_string_view(s);
 186 }
 187 
 188 // size_t count(string& s, char what) {
 189 //     size_t count = 0;
 190 //     for (auto c : s) {
 191 //         if (c == what) {
 192 //             count++;
 193 //         }
 194 //     }
 195 //     return count;
 196 // }
 197 
 198 bool handle_input(istream& in, string& line) {
 199     if (!getline(in, line)) {
 200         return true;
 201     }
 202     de_bom(line);
 203     de_cr(line);
 204 
 205     vector<string> keys;
 206     keys.reserve(count_if(line.begin(), line.end(), [](char c) {
 207         return c == '\t';
 208     }) + 1);
 209     // keys.reserve(count(line, '\t') + 1);
 210     tab_split(line, keys);
 211     size_t n = keys.size();
 212 
 213     size_t i = 0;
 214     vector<string_view> values;
 215     values.reserve(n);
 216 
 217     for (i = 0; !cout.eof() && getline(in, line); i++) {
 218         de_cr(line);
 219 
 220         if (i == 0) {
 221             cout << '[' << endl;
 222         } else {
 223             cout << ',' << endl;
 224         }
 225 
 226         values.clear();
 227         tab_split_view(line, values);
 228         size_t got = values.size();
 229 
 230         if (got > n) {
 231             cerr << ERROR_STYLE "expected up to " << n << " items, but got ";
 232             cerr << got << " instead" << RESET_STYLE << endl;
 233             return false;
 234         }
 235 
 236         cout << "  {";
 237         for (size_t j = 0; j < got; j++) {
 238             if (j > 0) {
 239                 cout << ", ";
 240             }
 241             emit_json_string(keys[j]);
 242             cout << ": ";
 243             emit_json_value(values[j]);
 244         }
 245         for (size_t j = got; j < n; j++) {
 246             cout << ", ";
 247             emit_json_string(keys[j]);
 248             cout << ": null";
 249         }
 250         cout << '}';
 251     }
 252 
 253     if (i > 0) {
 254         cout << endl;
 255         cout << ']' << endl;
 256     } else {
 257         cout << "[]" << endl;
 258     }
 259     return true;
 260 }
 261 
 262 bool handle_file(const char* path, string& line) {
 263     ifstream f(path);
 264     if (!f.is_open()) {
 265         const auto msg = "can't open file named";
 266         cerr << ERROR_STYLE << msg << " '" << path << "'" << RESET_STYLE << endl;
 267         return false;
 268     }
 269 
 270     return handle_input(f, line);
 271 }
 272 
 273 int main(int argc, char** argv) {
 274     string line;
 275 
 276     if (argc > 1) {
 277         if (
 278             strcmp(argv[1], "-h") == 0 ||
 279             strcmp(argv[1], "-help") == 0 ||
 280             strcmp(argv[1], "--h") == 0 ||
 281             strcmp(argv[1], "--help") == 0
 282         ) {
 283             cout << info;
 284             return 0;
 285         }
 286     }
 287 
 288     if (argc > 2) {
 289         cerr << ERROR_STYLE << "can't use more than 1 input file" << RESET_STYLE << endl;
 290         return 1;
 291     }
 292 
 293     cin.tie(NULL);
 294     ios_base::sync_with_stdio(false);
 295 
 296     if (argc < 2 || strcmp(argv[1], "-") == 0) {
 297         return handle_input(cin, line) ? 0 : 1;
 298     }
 299 
 300     return handle_file(argv[1], line) ? 0 : 1;
 301 }