File: detsv.cpp 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 g++ -Wall -O2 -s -o detsv detsv.cpp 29 */ 30 31 #include <algorithm> 32 #include <cstring> 33 #include <fstream> 34 #include <iostream> 35 #include <string> 36 #include <vector> 37 38 #ifdef RED_ERRORS 39 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 40 #ifdef __APPLE__ 41 #define ERROR_STYLE "\x1b[31m" 42 #endif 43 #define RESET_STYLE "\x1b[0m" 44 #else 45 #define ERROR_STYLE "" 46 #define RESET_STYLE "" 47 #endif 48 49 using namespace std; 50 51 const string info = "" 52 "detsv [file...]\n" 53 "\n" 54 "Turn TSV tables into JSON data.\n" 55 ""; 56 57 void de_bom(string &s) { 58 // s.starts_with("\xef\xbb\xbf") 59 if (s.size() >= 3 && s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') { 60 s.erase(0, 3); 61 } 62 } 63 64 void de_cr(string &s) { 65 s.erase(remove(s.begin(), s.end(), '\r'), s.end()); 66 } 67 68 void tab_split(const string& line, vector<string>& items) { 69 size_t start = 0; 70 size_t end = line.size(); 71 72 for (size_t i = 0; i < end; i++) { 73 if (line[i] == '\t') { 74 items.push_back(line.substr(start, i - start)); 75 start = i + 1; 76 } 77 } 78 79 if (start < end) { 80 items.push_back(line.substr(start, end - start + 1)); 81 } 82 } 83 84 void tab_split_view(const string& line, vector<string_view>& items) { 85 size_t start = 0; 86 size_t end = line.size(); 87 88 for (size_t i = 0; i < end; i++) { 89 if (line[i] == '\t') { 90 items.push_back(string_view(line).substr(start, i - start)); 91 start = i + 1; 92 } 93 } 94 95 if (start < end) { 96 items.push_back(string_view(line).substr(start, end - start + 1)); 97 } 98 } 99 100 void emit_json_string(const string& s) { 101 cout << '"'; 102 for (auto c : s) { 103 switch (c) { 104 case '"': 105 case '\\': 106 cout << '\\' << c; 107 break; 108 109 default: 110 cout << c; 111 break; 112 } 113 } 114 cout << '"'; 115 } 116 117 void emit_json_string_view(const string_view& s) { 118 cout << '"'; 119 for (auto c : s) { 120 switch (c) { 121 case '"': 122 case '\\': 123 cout << '\\' << c; 124 break; 125 126 default: 127 cout << c; 128 break; 129 } 130 } 131 cout << '"'; 132 } 133 134 // seems_json_number detects only a subset of valid json numbers for now 135 bool seems_json_number(const string_view& s) { 136 size_t dots = 0; 137 size_t digits = 0; 138 139 for (auto c : s) { 140 if ('0' <= c && c <= '9') { 141 digits++; 142 continue; 143 } 144 145 if (c == '-' && digits > 0) { 146 return false; 147 } 148 149 if (c == '.') { 150 if (digits == 0 || dots > 0) { 151 return false; 152 } 153 154 dots++; 155 digits = 0; // effectively demand digits after a dot 156 continue; 157 } 158 159 return false; 160 } 161 162 return digits > 0; 163 } 164 165 void emit_json_value(const string_view& s) { 166 // if (s == "") { 167 // cout << "null"; 168 // return; 169 // } 170 171 // if (s == "null") { 172 // cout << s; 173 // return; 174 // } 175 176 // if (s == "true" || s == "false") { 177 // cout << s; 178 // return; 179 // } 180 181 // recognize numbers to avoid quoting them 182 // if (seems_json_number(s)) { 183 // cout << s; 184 // return; 185 // } 186 187 emit_json_string_view(s); 188 } 189 190 // size_t count(string& s, char what) { 191 // size_t count = 0; 192 // for (auto c : s) { 193 // if (c == what) { 194 // count++; 195 // } 196 // } 197 // return count; 198 // } 199 200 bool handle_input(istream& in, string& line) { 201 if (!getline(in, line)) { 202 return true; 203 } 204 de_bom(line); 205 de_cr(line); 206 207 vector<string> keys; 208 keys.reserve(count_if(line.begin(), line.end(), [](char c) { 209 return c == '\t'; 210 }) + 1); 211 // keys.reserve(count(line, '\t') + 1); 212 tab_split(line, keys); 213 size_t n = keys.size(); 214 215 size_t i = 0; 216 vector<string_view> values; 217 values.reserve(n); 218 219 for (i = 0; !cout.eof() && getline(in, line); i++) { 220 de_cr(line); 221 222 if (i == 0) { 223 cout << '[' << endl; 224 } else { 225 cout << ',' << endl; 226 } 227 228 values.clear(); 229 tab_split_view(line, values); 230 size_t got = values.size(); 231 232 if (got > n) { 233 cerr << ERROR_STYLE "expected up to " << n << " items, but got "; 234 cerr << got << " instead" << RESET_STYLE << endl; 235 return false; 236 } 237 238 cout << " {"; 239 for (size_t j = 0; j < got; j++) { 240 if (j > 0) { 241 cout << ", "; 242 } 243 emit_json_string(keys[j]); 244 cout << ": "; 245 emit_json_value(values[j]); 246 } 247 for (size_t j = got; j < n; j++) { 248 cout << ", "; 249 emit_json_string(keys[j]); 250 cout << ": null"; 251 } 252 cout << '}'; 253 } 254 255 if (i > 0) { 256 cout << endl; 257 cout << ']' << endl; 258 } else { 259 cout << "[]" << endl; 260 } 261 return true; 262 } 263 264 bool handle_file(const char* path, string& line) { 265 ifstream f(path); 266 if (!f.is_open()) { 267 const auto msg = "can't open file named"; 268 cerr << ERROR_STYLE << msg << " '" << path << "'" << RESET_STYLE << endl; 269 return false; 270 } 271 272 return handle_input(f, line); 273 } 274 275 int main(int argc, char** argv) { 276 string line; 277 278 if (argc > 1) { 279 if ( 280 strcmp(argv[1], "-h") == 0 || 281 strcmp(argv[1], "-help") == 0 || 282 strcmp(argv[1], "--h") == 0 || 283 strcmp(argv[1], "--help") == 0 284 ) { 285 cout << info; 286 return 0; 287 } 288 } 289 290 if (argc > 2) { 291 cerr << ERROR_STYLE << "can't use more than 1 input file" << RESET_STYLE << endl; 292 return 1; 293 } 294 295 cin.tie(NULL); 296 ios_base::sync_with_stdio(false); 297 298 if (argc < 2 || strcmp(argv[1], "-") == 0) { 299 return handle_input(cin, line) ? 0 : 1; 300 } 301 302 return handle_file(argv[1], line) ? 0 : 1; 303 }