File: detsv.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath detsv.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 "strings" 41 "unicode" 42 ) 43 44 const info = ` 45 detsv [options...] [filepath...] 46 47 48 This cmd-line app turns TSV (tab-separated values) data into general either 49 JSON (JavaScript Object Notation), JSONS (JSON Strings), or even JSONL (JSON 50 Lines). 51 52 When not given a filepath, the input is read from the standard input. 53 54 Options, when given, can either start with a single or a double-dash: 55 56 -h show this help message 57 -help show this help message 58 59 -json emit JSON, where numbers are auto-detected 60 -jsonl emit JSON Lines, where numbers are auto-detected 61 -jsons emit JSON Strings, where object values are strings or null 62 ` 63 64 // noMoreOutput is a custom error-type meant to be deliberately ignored 65 type noMoreOutput struct{} 66 67 func (nmo noMoreOutput) Error() string { 68 return `no more output` 69 } 70 71 // handler is the type all TSV-converter funcs adhere to 72 type handler func(*bufio.Writer, *bufio.Scanner) error 73 74 var handlers = map[string]handler{ 75 `-json`: emitJSON, 76 `--json`: emitJSON, 77 `-jsonl`: emitJSONL, 78 `--jsonl`: emitJSONL, 79 `-jsons`: emitJSONS, 80 `--jsons`: emitJSONS, 81 } 82 83 func main() { 84 if len(os.Args) > 1 { 85 switch os.Args[1] { 86 case `-h`, `--h`, `-help`, `--help`: 87 os.Stdout.WriteString(info[1:]) 88 return 89 } 90 } 91 92 args := os.Args[1:] 93 emit := emitJSON 94 if len(args) > 0 { 95 if v, ok := handlers[args[0]]; ok { 96 emit = v 97 args = args[1:] 98 } 99 } 100 101 if len(args) > 1 { 102 os.Stdout.WriteString(info[1:]) 103 os.Exit(1) 104 } 105 106 path := `-` 107 if len(args) > 0 { 108 path = args[0] 109 } 110 111 err := handleInput(os.Stdout, os.Stdin, path, emit) 112 if _, ok := err.(noMoreOutput); ok { 113 return 114 } 115 116 if err != nil { 117 os.Stderr.WriteString("\x1b[31m") 118 os.Stderr.WriteString(err.Error()) 119 os.Stderr.WriteString("\x1b[0m\n") 120 os.Exit(1) 121 } 122 } 123 124 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error { 125 bw := bufio.NewWriter(w) 126 defer bw.Flush() 127 128 if path == `-` { 129 return handle(bw, makeRowReader(r)) 130 } 131 132 f, err := os.Open(path) 133 if err != nil { 134 // on windows, file-not-found error messages may mention `CreateFile`, 135 // even when trying to open files in read-only mode 136 return errors.New(`can't open file named ` + path) 137 } 138 defer f.Close() 139 140 return handle(bw, makeRowReader(f)) 141 } 142 143 func emitJSON(w *bufio.Writer, rr *bufio.Scanner) error { 144 got := 0 145 var keys []string 146 147 err := loopTSV(rr, func(i int, row []string) error { 148 got = i 149 if i == 0 { 150 keys = make([]string, 0, len(row)) 151 for _, s := range row { 152 keys = append(keys, strings.Clone(s)) 153 } 154 return nil 155 } 156 157 if i == 1 { 158 w.WriteByte('[') 159 } else { 160 err := w.WriteByte(',') 161 if err != nil { 162 return noMoreOutput{} 163 } 164 } 165 166 w.WriteByte('{') 167 for i, s := range row { 168 if i > 0 { 169 w.WriteByte(',') 170 } 171 172 if numberLike(s) { 173 w.WriteByte('"') 174 writeInnerStringJSON(w, keys[i]) 175 w.WriteString(`":`) 176 w.WriteString(s) 177 continue 178 } 179 180 writeKV(w, keys[i], s) 181 } 182 183 for i := len(row); i < len(keys); i++ { 184 if i > 0 { 185 w.WriteByte(',') 186 } 187 w.WriteByte('"') 188 writeInnerStringJSON(w, keys[i]) 189 w.WriteString(`":null`) 190 } 191 w.WriteByte('}') 192 193 return nil 194 }) 195 196 if err != nil { 197 return err 198 } 199 200 if got > 0 { 201 w.WriteString("]\n") 202 } 203 return nil 204 } 205 206 func emitJSONL(w *bufio.Writer, rr *bufio.Scanner) error { 207 var keys []string 208 209 return loopTSV(rr, func(i int, row []string) error { 210 if i == 0 { 211 keys = make([]string, 0, len(row)) 212 for _, s := range row { 213 c := string(append([]byte{}, s...)) 214 keys = append(keys, c) 215 } 216 return nil 217 } 218 219 w.WriteByte('{') 220 for i, s := range row { 221 if i > 0 { 222 w.WriteByte(',') 223 w.WriteByte(' ') 224 } 225 226 if numberLike(s) { 227 w.WriteByte('"') 228 writeInnerStringJSON(w, keys[i]) 229 w.WriteString(`": `) 230 w.WriteString(s) 231 continue 232 } 233 234 writeKV(w, keys[i], s) 235 } 236 237 for i := len(row); i < len(keys); i++ { 238 if i > 0 { 239 w.WriteByte(',') 240 w.WriteByte(' ') 241 } 242 w.WriteByte('"') 243 writeInnerStringJSON(w, keys[i]) 244 w.WriteString(`": null`) 245 } 246 w.WriteByte('}') 247 248 w.WriteByte('\n') 249 if err := w.Flush(); err != nil { 250 return noMoreOutput{} 251 } 252 return nil 253 }) 254 } 255 256 func emitJSONS(w *bufio.Writer, rr *bufio.Scanner) error { 257 got := 0 258 var keys []string 259 260 err := loopTSV(rr, func(i int, row []string) error { 261 got = i 262 263 if i == 0 { 264 keys = make([]string, 0, len(row)) 265 for _, s := range row { 266 c := string(append([]byte{}, s...)) 267 keys = append(keys, c) 268 } 269 return nil 270 } 271 272 if i == 1 { 273 w.WriteByte('[') 274 } else { 275 err := w.WriteByte(',') 276 if err != nil { 277 return noMoreOutput{} 278 } 279 } 280 281 w.WriteByte('{') 282 for i, s := range row { 283 if i > 0 { 284 w.WriteByte(',') 285 } 286 writeKV(w, keys[i], s) 287 } 288 289 for i := len(row); i < len(keys); i++ { 290 if i > 0 { 291 w.WriteByte(',') 292 } 293 w.WriteByte('"') 294 writeInnerStringJSON(w, keys[i]) 295 w.WriteString(`":null`) 296 } 297 w.WriteByte('}') 298 299 return nil 300 }) 301 302 if err != nil { 303 return err 304 } 305 306 if got > 0 { 307 w.WriteString("]\n") 308 } 309 return nil 310 } 311 312 // writeInnerStringJSON helps JSON-encode strings more quickly 313 func writeInnerStringJSON(w *bufio.Writer, s string) { 314 needsEscaping := false 315 for _, r := range s { 316 if '#' <= r && r <= '~' && r != '\\' { 317 continue 318 } 319 if r == ' ' || r == '!' || unicode.IsLetter(r) { 320 continue 321 } 322 323 needsEscaping = true 324 break 325 } 326 327 if !needsEscaping { 328 w.WriteString(s) 329 return 330 } 331 332 outer, err := json.Marshal(s) 333 if err != nil { 334 return 335 } 336 inner := outer[1 : len(outer)-1] 337 w.Write(inner) 338 } 339 340 func writeKV(w *bufio.Writer, k string, s string) { 341 w.WriteByte('"') 342 writeInnerStringJSON(w, k) 343 w.WriteString(`": "`) 344 writeInnerStringJSON(w, s) 345 w.WriteByte('"') 346 } 347 348 func numberLike(s string) bool { 349 if len(s) == 0 { 350 return false 351 } 352 353 if s[0] == '-' { 354 s = s[1:] 355 } 356 357 if len(s) == 0 || s[0] < '0' || s[0] > '9' { 358 return false 359 } 360 361 for len(s) > 0 { 362 lead := s[0] 363 s = s[1:] 364 365 if lead == '.' { 366 return allDigits(s) 367 } 368 if lead < '0' || lead > '9' { 369 return false 370 } 371 } 372 373 return true 374 } 375 376 func allDigits(s string) bool { 377 if len(s) == 0 { 378 return false 379 } 380 381 for _, r := range s { 382 if r < '0' || r > '9' { 383 return false 384 } 385 } 386 return true 387 } 388 389 func makeRowReader(r io.Reader) *bufio.Scanner { 390 rr := bufio.NewScanner(r) 391 rr.Buffer(nil, 8*1024*1024*1024) 392 return rr 393 } 394 395 func loopTSV(rr *bufio.Scanner, handle func(i int, row []string) error) error { 396 i := 0 397 width := 0 398 var row []string 399 400 for rr.Scan() { 401 line := rr.Text() 402 if len(line) == 0 { 403 continue 404 } 405 406 if i == 0 { 407 width = len(row) 408 } 409 410 row = appendTSV(row[:0], line) 411 if len(row) > width { 412 return errors.New(`data-row has more items than the header`) 413 } 414 415 if err := handle(i, row); err != nil { 416 return err 417 } 418 i++ 419 } 420 421 return nil 422 } 423 424 func appendTSV(dst []string, row string) []string { 425 for len(dst) > 0 { 426 i := strings.IndexByte(row, '\t') 427 if i < 0 { 428 return append(dst, row) 429 } 430 431 dst = append(dst, row[:i]) 432 row = row[i+1:] 433 } 434 435 return dst 436 }