File: detsv.cpp
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 g++ -Wall -O2 -s -o detsv detsv.cpp
  29 */
  30 
  31 #include <algorithm>
  32 #include <cstring>
  33 #include <fstream>
  34 #include <iostream>
  35 #include <string>
  36 #include <vector>
  37 
  38 using namespace std;
  39 
  40 const string info = ""
  41 "detsv [file...]\n"
  42 "\n"
  43 "Turn TSV tables into JSON data.\n"
  44 "";
  45 
  46 void de_bom(string &s) {
  47     // s.starts_with("\xef\xbb\xbf")
  48     if (s.size() >= 3 && s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') {
  49         s.erase(0, 3);
  50     }
  51 }
  52 
  53 void de_cr(string &s) {
  54     s.erase(remove(s.begin(), s.end(), '\r'), s.end());
  55 }
  56 
  57 void tab_split(const string& line, vector<string>& items) {
  58     size_t start = 0;
  59     size_t end = line.size();
  60 
  61     for (size_t i = 0; i < end; i++) {
  62         if (line[i] == '\t') {
  63             items.push_back(line.substr(start, i - start));
  64             start = i + 1;
  65         }
  66     }
  67 
  68     if (start < end) {
  69         items.push_back(line.substr(start, end - start + 1));
  70     }
  71 }
  72 
  73 void tab_split_view(const string& line, vector<string_view>& items) {
  74     size_t start = 0;
  75     size_t end = line.size();
  76 
  77     for (size_t i = 0; i < end; i++) {
  78         if (line[i] == '\t') {
  79             items.push_back(string_view(line).substr(start, i - start));
  80             start = i + 1;
  81         }
  82     }
  83 
  84     if (start < end) {
  85         items.push_back(string_view(line).substr(start, end - start + 1));
  86     }
  87 }
  88 
  89 void emit_json_string(const string& s) {
  90     cout << '"';
  91     for (auto c : s) {
  92         switch (c) {
  93             case '"':
  94             case '\\':
  95                 cout << '\\' << c;
  96                 break;
  97 
  98             default:
  99                 cout << c;
 100                 break;
 101         }
 102     }
 103     cout << '"';
 104 }
 105 
 106 void emit_json_string_view(const string_view& s) {
 107     cout << '"';
 108     for (auto c : s) {
 109         switch (c) {
 110             case '"':
 111             case '\\':
 112                 cout << '\\' << c;
 113                 break;
 114 
 115             default:
 116                 cout << c;
 117                 break;
 118         }
 119     }
 120     cout << '"';
 121 }
 122 
 123 // seems_json_number detects only a subset of valid json numbers for now
 124 bool seems_json_number(const string_view& s) {
 125     size_t dots = 0;
 126     size_t digits = 0;
 127 
 128     for (auto c : s) {
 129         if ('0' <= c && c <= '9') {
 130             digits++;
 131             continue;
 132         }
 133 
 134         if (c == '-' && digits > 0) {
 135             return false;
 136         }
 137 
 138         if (c == '.') {
 139             if (digits == 0 || dots > 0) {
 140                 return false;
 141             }
 142 
 143             dots++;
 144             digits = 0; // effectively demand digits after a dot
 145             continue;
 146         }
 147 
 148         return false;
 149     }
 150 
 151     return digits > 0;
 152 }
 153 
 154 void emit_json_value(const string_view& s) {
 155     // if (s == "") {
 156     //     cout << "null";
 157     //     return;
 158     // }
 159 
 160     // if (s == "null") {
 161     //     cout << s;
 162     //     return;
 163     // }
 164 
 165     // if (s == "true" || s == "false") {
 166     //     cout << s;
 167     //     return;
 168     // }
 169 
 170     // recognize numbers to avoid quoting them
 171     // if (seems_json_number(s)) {
 172     //     cout << s;
 173     //     return;
 174     // }
 175 
 176     emit_json_string_view(s);
 177 }
 178 
 179 // size_t count(string& s, char what) {
 180 //     size_t count = 0;
 181 //     for (auto c : s) {
 182 //         if (c == what) {
 183 //             count++;
 184 //         }
 185 //     }
 186 //     return count;
 187 // }
 188 
 189 bool handle_input(istream& in, string& line) {
 190     if (!getline(in, line)) {
 191         return true;
 192     }
 193     de_bom(line);
 194     de_cr(line);
 195 
 196     vector<string> keys;
 197     keys.reserve(count_if(line.begin(), line.end(), [](char c) {
 198         return c == '\t';
 199     }) + 1);
 200     // keys.reserve(count(line, '\t') + 1);
 201     tab_split(line, keys);
 202     size_t n = keys.size();
 203 
 204     size_t i = 0;
 205     vector<string_view> values;
 206     values.reserve(n);
 207 
 208     for (i = 0; !cout.eof() && getline(in, line); i++) {
 209         de_cr(line);
 210 
 211         if (i == 0) {
 212             cout << '[' << endl;
 213         } else {
 214             cout << ',' << endl;
 215         }
 216 
 217         values.clear();
 218         tab_split_view(line, values);
 219         size_t got = values.size();
 220 
 221         if (got > n) {
 222             cerr << "\x1b[31mexpected up to " << n << " items, but got ";
 223             cerr << got << " instead\x1b[0m" << endl;
 224             return false;
 225         }
 226 
 227         cout << "  {";
 228         for (size_t j = 0; j < got; j++) {
 229             if (j > 0) {
 230                 cout << ", ";
 231             }
 232             emit_json_string(keys[j]);
 233             cout << ": ";
 234             emit_json_value(values[j]);
 235         }
 236         for (size_t j = got; j < n; j++) {
 237             cout << ", ";
 238             emit_json_string(keys[j]);
 239             cout << ": null";
 240         }
 241         cout << '}';
 242     }
 243 
 244     if (i > 0) {
 245         cout << endl;
 246         cout << ']' << endl;
 247     } else {
 248         cout << "[]" << endl;
 249     }
 250     return true;
 251 }
 252 
 253 bool handle_file(const char* path, string& line) {
 254     ifstream f(path);
 255     if (!f.is_open()) {
 256         const auto msg = "can't open file named";
 257         cerr << "\x1b[31m" << msg << " '" << path << "'\x1b[0m" << endl;
 258         return false;
 259     }
 260 
 261     return handle_input(f, line);
 262 }
 263 
 264 int main(int argc, char** argv) {
 265     string line;
 266 
 267     if (argc > 1) {
 268         if (
 269             strcmp(argv[1], "-h") == 0 ||
 270             strcmp(argv[1], "-help") == 0 ||
 271             strcmp(argv[1], "--h") == 0 ||
 272             strcmp(argv[1], "--help") == 0
 273         ) {
 274             cout << info;
 275             return 0;
 276         }
 277     }
 278 
 279     if (argc > 2) {
 280         cerr << "\x1b[31mcan't use more than 1 input file\x1b[0m" << endl;
 281         return 1;
 282     }
 283 
 284     cin.tie(NULL);
 285     ios_base::sync_with_stdio(false);
 286 
 287     if (argc < 2 || strcmp(argv[1], "-") == 0) {
 288         return handle_input(cin, line) ? 0 : 1;
 289     }
 290 
 291     return handle_file(argv[1], line) ? 0 : 1;
 292 }