File: detsv.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath detsv.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 "strings" 41 "unicode" 42 ) 43 44 const info = ` 45 detsv [options...] [filepath...] 46 47 48 This cmd-line app turns TSV (tab-separated values) data into general either 49 JSON (JavaScript Object Notation), JSONS (JSON Strings), or even JSONL (JSON 50 Lines). 51 52 When not given a filepath, the input is read from the standard input. 53 54 Options, when given, can either start with a single or a double-dash: 55 56 -h show this help message 57 -help show this help message 58 59 -json emit JSON, where numbers are auto-detected 60 -jsonl emit JSON Lines, where numbers are auto-detected 61 -jsons emit JSON Strings, where object values are strings or null 62 ` 63 64 // noMoreOutput is a custom error-type meant to be deliberately ignored 65 type noMoreOutput struct{} 66 67 func (nmo noMoreOutput) Error() string { 68 return `no more output` 69 } 70 71 // handler is the type all TSV-converter funcs adhere to 72 type handler func(*bufio.Writer, *bufio.Scanner) error 73 74 var handlers = map[string]handler{ 75 `-json`: emitJSON, 76 `--json`: emitJSON, 77 `-jsonl`: emitJSONL, 78 `--jsonl`: emitJSONL, 79 `-jsons`: emitJSONS, 80 `--jsons`: emitJSONS, 81 } 82 83 func main() { 84 if len(os.Args) > 1 { 85 switch os.Args[1] { 86 case `-h`, `--h`, `-help`, `--help`: 87 os.Stdout.WriteString(info[1:]) 88 return 89 } 90 } 91 92 args := os.Args[1:] 93 emit := emitJSON 94 if len(args) > 0 { 95 if v, ok := handlers[args[0]]; ok { 96 emit = v 97 args = args[1:] 98 } 99 } 100 101 if len(args) > 1 { 102 os.Stdout.WriteString(info[1:]) 103 os.Exit(1) 104 } 105 106 path := `-` 107 if len(args) > 0 { 108 path = args[0] 109 } 110 111 err := handleInput(os.Stdout, os.Stdin, path, emit) 112 if _, ok := err.(noMoreOutput); ok { 113 return 114 } 115 116 if err != nil { 117 os.Stderr.WriteString(err.Error()) 118 os.Stderr.WriteString("\n") 119 os.Exit(1) 120 } 121 } 122 123 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error { 124 bw := bufio.NewWriter(w) 125 defer bw.Flush() 126 127 if path == `-` { 128 return handle(bw, makeRowReader(r)) 129 } 130 131 f, err := os.Open(path) 132 if err != nil { 133 // on windows, file-not-found error messages may mention `CreateFile`, 134 // even when trying to open files in read-only mode 135 return errors.New(`can't open file named ` + path) 136 } 137 defer f.Close() 138 139 return handle(bw, makeRowReader(f)) 140 } 141 142 func emitJSON(w *bufio.Writer, rr *bufio.Scanner) error { 143 got := 0 144 var keys []string 145 146 err := loopTSV(rr, func(i int, row []string) error { 147 got = i 148 if i == 0 { 149 keys = make([]string, 0, len(row)) 150 for _, s := range row { 151 keys = append(keys, strings.Clone(s)) 152 } 153 return nil 154 } 155 156 if i == 1 { 157 w.WriteByte('[') 158 } else { 159 err := w.WriteByte(',') 160 if err != nil { 161 return noMoreOutput{} 162 } 163 } 164 165 w.WriteByte('{') 166 for i, s := range row { 167 if i > 0 { 168 w.WriteByte(',') 169 } 170 171 if numberLike(s) { 172 w.WriteByte('"') 173 writeInnerStringJSON(w, keys[i]) 174 w.WriteString(`":`) 175 w.WriteString(s) 176 continue 177 } 178 179 writeKV(w, keys[i], s) 180 } 181 182 for i := len(row); i < len(keys); i++ { 183 if i > 0 { 184 w.WriteByte(',') 185 } 186 w.WriteByte('"') 187 writeInnerStringJSON(w, keys[i]) 188 w.WriteString(`":null`) 189 } 190 w.WriteByte('}') 191 192 return nil 193 }) 194 195 if err != nil { 196 return err 197 } 198 199 if got > 0 { 200 w.WriteString("]\n") 201 } 202 return nil 203 } 204 205 func emitJSONL(w *bufio.Writer, rr *bufio.Scanner) error { 206 var keys []string 207 208 return loopTSV(rr, func(i int, row []string) error { 209 if i == 0 { 210 keys = make([]string, 0, len(row)) 211 for _, s := range row { 212 c := string(append([]byte{}, s...)) 213 keys = append(keys, c) 214 } 215 return nil 216 } 217 218 w.WriteByte('{') 219 for i, s := range row { 220 if i > 0 { 221 w.WriteByte(',') 222 w.WriteByte(' ') 223 } 224 225 if numberLike(s) { 226 w.WriteByte('"') 227 writeInnerStringJSON(w, keys[i]) 228 w.WriteString(`": `) 229 w.WriteString(s) 230 continue 231 } 232 233 writeKV(w, keys[i], s) 234 } 235 236 for i := len(row); i < len(keys); i++ { 237 if i > 0 { 238 w.WriteByte(',') 239 w.WriteByte(' ') 240 } 241 w.WriteByte('"') 242 writeInnerStringJSON(w, keys[i]) 243 w.WriteString(`": null`) 244 } 245 w.WriteByte('}') 246 247 w.WriteByte('\n') 248 if err := w.Flush(); err != nil { 249 return noMoreOutput{} 250 } 251 return nil 252 }) 253 } 254 255 func emitJSONS(w *bufio.Writer, rr *bufio.Scanner) error { 256 got := 0 257 var keys []string 258 259 err := loopTSV(rr, func(i int, row []string) error { 260 got = i 261 262 if i == 0 { 263 keys = make([]string, 0, len(row)) 264 for _, s := range row { 265 c := string(append([]byte{}, s...)) 266 keys = append(keys, c) 267 } 268 return nil 269 } 270 271 if i == 1 { 272 w.WriteByte('[') 273 } else { 274 err := w.WriteByte(',') 275 if err != nil { 276 return noMoreOutput{} 277 } 278 } 279 280 w.WriteByte('{') 281 for i, s := range row { 282 if i > 0 { 283 w.WriteByte(',') 284 } 285 writeKV(w, keys[i], s) 286 } 287 288 for i := len(row); i < len(keys); i++ { 289 if i > 0 { 290 w.WriteByte(',') 291 } 292 w.WriteByte('"') 293 writeInnerStringJSON(w, keys[i]) 294 w.WriteString(`":null`) 295 } 296 w.WriteByte('}') 297 298 return nil 299 }) 300 301 if err != nil { 302 return err 303 } 304 305 if got > 0 { 306 w.WriteString("]\n") 307 } 308 return nil 309 } 310 311 // writeInnerStringJSON helps JSON-encode strings more quickly 312 func writeInnerStringJSON(w *bufio.Writer, s string) { 313 needsEscaping := false 314 for _, r := range s { 315 if '#' <= r && r <= '~' && r != '\\' { 316 continue 317 } 318 if r == ' ' || r == '!' || unicode.IsLetter(r) { 319 continue 320 } 321 322 needsEscaping = true 323 break 324 } 325 326 if !needsEscaping { 327 w.WriteString(s) 328 return 329 } 330 331 outer, err := json.Marshal(s) 332 if err != nil { 333 return 334 } 335 inner := outer[1 : len(outer)-1] 336 w.Write(inner) 337 } 338 339 func writeKV(w *bufio.Writer, k string, s string) { 340 w.WriteByte('"') 341 writeInnerStringJSON(w, k) 342 w.WriteString(`": "`) 343 writeInnerStringJSON(w, s) 344 w.WriteByte('"') 345 } 346 347 func numberLike(s string) bool { 348 if len(s) == 0 { 349 return false 350 } 351 352 if s[0] == '-' { 353 s = s[1:] 354 } 355 356 if len(s) == 0 || s[0] < '0' || s[0] > '9' { 357 return false 358 } 359 360 for len(s) > 0 { 361 lead := s[0] 362 s = s[1:] 363 364 if lead == '.' { 365 return allDigits(s) 366 } 367 if lead < '0' || lead > '9' { 368 return false 369 } 370 } 371 372 return true 373 } 374 375 func allDigits(s string) bool { 376 if len(s) == 0 { 377 return false 378 } 379 380 for _, r := range s { 381 if r < '0' || r > '9' { 382 return false 383 } 384 } 385 return true 386 } 387 388 func makeRowReader(r io.Reader) *bufio.Scanner { 389 rr := bufio.NewScanner(r) 390 rr.Buffer(nil, 8*1024*1024*1024) 391 return rr 392 } 393 394 func loopTSV(rr *bufio.Scanner, handle func(i int, row []string) error) error { 395 i := 0 396 width := 0 397 var row []string 398 399 for rr.Scan() { 400 line := rr.Text() 401 if len(line) == 0 { 402 continue 403 } 404 405 if i == 0 { 406 width = len(row) 407 } 408 409 row = appendTSV(row[:0], line) 410 if len(row) > width { 411 return errors.New(`data-row has more items than the header`) 412 } 413 414 if err := handle(i, row); err != nil { 415 return err 416 } 417 i++ 418 } 419 420 return nil 421 } 422 423 func appendTSV(dst []string, row string) []string { 424 for len(dst) > 0 { 425 i := strings.IndexByte(row, '\t') 426 if i < 0 { 427 return append(dst, row) 428 } 429 430 dst = append(dst, row[:i]) 431 row = row[i+1:] 432 } 433 434 return dst 435 }