File: detsv.cpp 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 g++ -Wall -O2 -s -o detsv detsv.cpp 29 */ 30 31 #include <algorithm> 32 #include <cstring> 33 #include <fstream> 34 #include <iostream> 35 #include <string> 36 #include <vector> 37 38 #ifdef RED_ERRORS 39 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 40 #ifdef __APPLE__ 41 #define ERROR_STYLE "\x1b[31m" 42 #endif 43 #define RESET_STYLE "\x1b[0m" 44 #else 45 #define ERROR_STYLE "" 46 #define RESET_STYLE "" 47 #endif 48 49 using namespace std; 50 51 const string info = "" 52 "detsv [file...]\n" 53 "\n" 54 "Turn TSV tables into JSON data.\n" 55 ""; 56 57 void de_bom(string &s) { 58 // s.starts_with("\xef\xbb\xbf") 59 if (s.size() >= 3 && s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') { 60 s.erase(0, 3); 61 } 62 } 63 64 void de_cr(string &s) { 65 s.erase(remove(s.begin(), s.end(), '\r'), s.end()); 66 } 67 68 void tab_split(const string& line, vector<string>& items) { 69 size_t start = 0; 70 size_t end = line.size(); 71 72 for (size_t i = 0; i < end; i++) { 73 if (line[i] == '\t') { 74 items.push_back(line.substr(start, i - start)); 75 start = i + 1; 76 } 77 } 78 79 if (start < end) { 80 items.push_back(line.substr(start, end - start + 1)); 81 } 82 } 83 84 void tab_split_view(const string& line, vector<string_view>& items) { 85 size_t start = 0; 86 size_t end = line.size(); 87 88 for (size_t i = 0; i < end; i++) { 89 if (line[i] == '\t') { 90 items.push_back(string_view(line).substr(start, i - start)); 91 start = i + 1; 92 } 93 } 94 95 if (start < end) { 96 items.push_back(string_view(line).substr(start, end - start + 1)); 97 } 98 } 99 100 void emit_json_string(const string& s) { 101 cout << '"'; 102 for (auto c : s) { 103 switch (c) { 104 case '"': 105 case '\\': 106 cout << '\\' << c; 107 break; 108 default: 109 cout << c; 110 break; 111 } 112 } 113 cout << '"'; 114 } 115 116 void emit_json_string_view(const string_view& s) { 117 cout << '"'; 118 for (auto c : s) { 119 switch (c) { 120 case '"': 121 case '\\': 122 cout << '\\' << c; 123 break; 124 default: 125 cout << c; 126 break; 127 } 128 } 129 cout << '"'; 130 } 131 132 // seems_json_number detects only a subset of valid json numbers for now 133 bool seems_json_number(const string_view& s) { 134 size_t dots = 0; 135 size_t digits = 0; 136 137 for (auto c : s) { 138 if ('0' <= c && c <= '9') { 139 digits++; 140 continue; 141 } 142 143 if (c == '-' && digits > 0) { 144 return false; 145 } 146 147 if (c == '.') { 148 if (digits == 0 || dots > 0) { 149 return false; 150 } 151 152 dots++; 153 digits = 0; // effectively demand digits after a dot 154 continue; 155 } 156 157 return false; 158 } 159 160 return digits > 0; 161 } 162 163 void emit_json_value(const string_view& s) { 164 // if (s == "") { 165 // cout << "null"; 166 // return; 167 // } 168 169 // if (s == "null") { 170 // cout << s; 171 // return; 172 // } 173 174 // if (s == "true" || s == "false") { 175 // cout << s; 176 // return; 177 // } 178 179 // recognize numbers to avoid quoting them 180 // if (seems_json_number(s)) { 181 // cout << s; 182 // return; 183 // } 184 185 emit_json_string_view(s); 186 } 187 188 // size_t count(string& s, char what) { 189 // size_t count = 0; 190 // for (auto c : s) { 191 // if (c == what) { 192 // count++; 193 // } 194 // } 195 // return count; 196 // } 197 198 bool handle_input(istream& in, string& line) { 199 if (!getline(in, line)) { 200 return true; 201 } 202 de_bom(line); 203 de_cr(line); 204 205 vector<string> keys; 206 keys.reserve(count_if(line.begin(), line.end(), [](char c) { 207 return c == '\t'; 208 }) + 1); 209 // keys.reserve(count(line, '\t') + 1); 210 tab_split(line, keys); 211 size_t n = keys.size(); 212 213 size_t i = 0; 214 vector<string_view> values; 215 values.reserve(n); 216 217 for (i = 0; !cout.eof() && getline(in, line); i++) { 218 de_cr(line); 219 220 if (i == 0) { 221 cout << '[' << endl; 222 } else { 223 cout << ',' << endl; 224 } 225 226 values.clear(); 227 tab_split_view(line, values); 228 size_t got = values.size(); 229 230 if (got > n) { 231 cerr << ERROR_STYLE "expected up to " << n << " items, but got "; 232 cerr << got << " instead" << RESET_STYLE << endl; 233 return false; 234 } 235 236 cout << " {"; 237 for (size_t j = 0; j < got; j++) { 238 if (j > 0) { 239 cout << ", "; 240 } 241 emit_json_string(keys[j]); 242 cout << ": "; 243 emit_json_value(values[j]); 244 } 245 for (size_t j = got; j < n; j++) { 246 cout << ", "; 247 emit_json_string(keys[j]); 248 cout << ": null"; 249 } 250 cout << '}'; 251 } 252 253 if (i > 0) { 254 cout << endl; 255 cout << ']' << endl; 256 } else { 257 cout << "[]" << endl; 258 } 259 return true; 260 } 261 262 bool handle_file(const char* path, string& line) { 263 ifstream f(path); 264 if (!f.is_open()) { 265 const auto msg = "can't open file named"; 266 cerr << ERROR_STYLE << msg << " '" << path << "'" << RESET_STYLE << endl; 267 return false; 268 } 269 270 return handle_input(f, line); 271 } 272 273 int main(int argc, char** argv) { 274 string line; 275 276 if (argc > 1) { 277 if ( 278 strcmp(argv[1], "-h") == 0 || 279 strcmp(argv[1], "-help") == 0 || 280 strcmp(argv[1], "--h") == 0 || 281 strcmp(argv[1], "--help") == 0 282 ) { 283 cout << info; 284 return 0; 285 } 286 } 287 288 if (argc > 2) { 289 cerr << ERROR_STYLE << "can't use more than 1 input file" << RESET_STYLE << endl; 290 return 1; 291 } 292 293 cin.tie(NULL); 294 ios_base::sync_with_stdio(false); 295 296 if (argc < 2 || strcmp(argv[1], "-") == 0) { 297 return handle_input(cin, line) ? 0 : 1; 298 } 299 300 return handle_file(argv[1], line) ? 0 : 1; 301 }