File: detsv.cpp
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 g++ -Wall -O2 -s -o detsv detsv.cpp
  29 */
  30 
  31 #include <algorithm>
  32 #include <cstring>
  33 #include <fstream>
  34 #include <iostream>
  35 #include <string>
  36 #include <vector>
  37 
  38 #ifdef RED_ERRORS
  39 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  40 #ifdef __APPLE__
  41 #define ERROR_STYLE "\x1b[31m"
  42 #endif
  43 #define RESET_STYLE "\x1b[0m"
  44 #else
  45 #define ERROR_STYLE ""
  46 #define RESET_STYLE ""
  47 #endif
  48 
  49 using namespace std;
  50 
  51 const string info = ""
  52 "detsv [file...]\n"
  53 "\n"
  54 "Turn TSV tables into JSON data.\n"
  55 "";
  56 
  57 void de_bom(string &s) {
  58     // s.starts_with("\xef\xbb\xbf")
  59     if (s.size() >= 3 && s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') {
  60         s.erase(0, 3);
  61     }
  62 }
  63 
  64 void de_cr(string &s) {
  65     s.erase(remove(s.begin(), s.end(), '\r'), s.end());
  66 }
  67 
  68 void tab_split(const string& line, vector<string>& items) {
  69     size_t start = 0;
  70     size_t end = line.size();
  71 
  72     for (size_t i = 0; i < end; i++) {
  73         if (line[i] == '\t') {
  74             items.push_back(line.substr(start, i - start));
  75             start = i + 1;
  76         }
  77     }
  78 
  79     if (start < end) {
  80         items.push_back(line.substr(start, end - start + 1));
  81     }
  82 }
  83 
  84 void tab_split_view(const string& line, vector<string_view>& items) {
  85     size_t start = 0;
  86     size_t end = line.size();
  87 
  88     for (size_t i = 0; i < end; i++) {
  89         if (line[i] == '\t') {
  90             items.push_back(string_view(line).substr(start, i - start));
  91             start = i + 1;
  92         }
  93     }
  94 
  95     if (start < end) {
  96         items.push_back(string_view(line).substr(start, end - start + 1));
  97     }
  98 }
  99 
 100 void emit_json_string(const string& s) {
 101     cout << '"';
 102     for (auto c : s) {
 103         switch (c) {
 104             case '"':
 105             case '\\':
 106                 cout << '\\' << c;
 107                 break;
 108 
 109             default:
 110                 cout << c;
 111                 break;
 112         }
 113     }
 114     cout << '"';
 115 }
 116 
 117 void emit_json_string_view(const string_view& s) {
 118     cout << '"';
 119     for (auto c : s) {
 120         switch (c) {
 121             case '"':
 122             case '\\':
 123                 cout << '\\' << c;
 124                 break;
 125 
 126             default:
 127                 cout << c;
 128                 break;
 129         }
 130     }
 131     cout << '"';
 132 }
 133 
 134 // seems_json_number detects only a subset of valid json numbers for now
 135 bool seems_json_number(const string_view& s) {
 136     size_t dots = 0;
 137     size_t digits = 0;
 138 
 139     for (auto c : s) {
 140         if ('0' <= c && c <= '9') {
 141             digits++;
 142             continue;
 143         }
 144 
 145         if (c == '-' && digits > 0) {
 146             return false;
 147         }
 148 
 149         if (c == '.') {
 150             if (digits == 0 || dots > 0) {
 151                 return false;
 152             }
 153 
 154             dots++;
 155             digits = 0; // effectively demand digits after a dot
 156             continue;
 157         }
 158 
 159         return false;
 160     }
 161 
 162     return digits > 0;
 163 }
 164 
 165 void emit_json_value(const string_view& s) {
 166     // if (s == "") {
 167     //     cout << "null";
 168     //     return;
 169     // }
 170 
 171     // if (s == "null") {
 172     //     cout << s;
 173     //     return;
 174     // }
 175 
 176     // if (s == "true" || s == "false") {
 177     //     cout << s;
 178     //     return;
 179     // }
 180 
 181     // recognize numbers to avoid quoting them
 182     // if (seems_json_number(s)) {
 183     //     cout << s;
 184     //     return;
 185     // }
 186 
 187     emit_json_string_view(s);
 188 }
 189 
 190 // size_t count(string& s, char what) {
 191 //     size_t count = 0;
 192 //     for (auto c : s) {
 193 //         if (c == what) {
 194 //             count++;
 195 //         }
 196 //     }
 197 //     return count;
 198 // }
 199 
 200 bool handle_input(istream& in, string& line) {
 201     if (!getline(in, line)) {
 202         return true;
 203     }
 204     de_bom(line);
 205     de_cr(line);
 206 
 207     vector<string> keys;
 208     keys.reserve(count_if(line.begin(), line.end(), [](char c) {
 209         return c == '\t';
 210     }) + 1);
 211     // keys.reserve(count(line, '\t') + 1);
 212     tab_split(line, keys);
 213     size_t n = keys.size();
 214 
 215     size_t i = 0;
 216     vector<string_view> values;
 217     values.reserve(n);
 218 
 219     for (i = 0; !cout.eof() && getline(in, line); i++) {
 220         de_cr(line);
 221 
 222         if (i == 0) {
 223             cout << '[' << endl;
 224         } else {
 225             cout << ',' << endl;
 226         }
 227 
 228         values.clear();
 229         tab_split_view(line, values);
 230         size_t got = values.size();
 231 
 232         if (got > n) {
 233             cerr << ERROR_STYLE "expected up to " << n << " items, but got ";
 234             cerr << got << " instead" << RESET_STYLE << endl;
 235             return false;
 236         }
 237 
 238         cout << "  {";
 239         for (size_t j = 0; j < got; j++) {
 240             if (j > 0) {
 241                 cout << ", ";
 242             }
 243             emit_json_string(keys[j]);
 244             cout << ": ";
 245             emit_json_value(values[j]);
 246         }
 247         for (size_t j = got; j < n; j++) {
 248             cout << ", ";
 249             emit_json_string(keys[j]);
 250             cout << ": null";
 251         }
 252         cout << '}';
 253     }
 254 
 255     if (i > 0) {
 256         cout << endl;
 257         cout << ']' << endl;
 258     } else {
 259         cout << "[]" << endl;
 260     }
 261     return true;
 262 }
 263 
 264 bool handle_file(const char* path, string& line) {
 265     ifstream f(path);
 266     if (!f.is_open()) {
 267         const auto msg = "can't open file named";
 268         cerr << ERROR_STYLE << msg << " '" << path << "'" << RESET_STYLE << endl;
 269         return false;
 270     }
 271 
 272     return handle_input(f, line);
 273 }
 274 
 275 int main(int argc, char** argv) {
 276     string line;
 277 
 278     if (argc > 1) {
 279         if (
 280             strcmp(argv[1], "-h") == 0 ||
 281             strcmp(argv[1], "-help") == 0 ||
 282             strcmp(argv[1], "--h") == 0 ||
 283             strcmp(argv[1], "--help") == 0
 284         ) {
 285             cout << info;
 286             return 0;
 287         }
 288     }
 289 
 290     if (argc > 2) {
 291         cerr << ERROR_STYLE << "can't use more than 1 input file" << RESET_STYLE << endl;
 292         return 1;
 293     }
 294 
 295     cin.tie(NULL);
 296     ios_base::sync_with_stdio(false);
 297 
 298     if (argc < 2 || strcmp(argv[1], "-") == 0) {
 299         return handle_input(cin, line) ? 0 : 1;
 300     }
 301 
 302     return handle_file(argv[1], line) ? 0 : 1;
 303 }