File: detsv.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for detsv. 27 28 To compile a smaller-sized command-line app, you can use the `go` command as 29 follows: 30 31 go build -ldflags "-s -w" -trimpath detsv.go 32 */ 33 34 package main 35 36 import ( 37 "bufio" 38 "encoding/json" 39 "errors" 40 "io" 41 "os" 42 "strings" 43 "unicode" 44 ) 45 46 const info = ` 47 detsv [options...] [filepath...] 48 49 50 This cmd-line app turns TSV (tab-separated values) data into general either 51 JSON (JavaScript Object Notation), JSONS (JSON Strings), or even JSONL (JSON 52 Lines). 53 54 When not given a filepath, the input is read from the standard input. 55 56 Options, when given, can either start with a single or a double-dash: 57 58 -h show this help message 59 -help show this help message 60 61 -json emit JSON, where numbers are auto-detected 62 -jsonl emit JSON Lines, where numbers are auto-detected 63 -jsons emit JSON Strings, where object values are strings or null 64 ` 65 66 // noMoreOutput is a custom error-type meant to be deliberately ignored 67 type noMoreOutput struct{} 68 69 func (nmo noMoreOutput) Error() string { 70 return `no more output` 71 } 72 73 // handler is the type all TSV-converter funcs adhere to 74 type handler func(*bufio.Writer, *bufio.Scanner) error 75 76 var handlers = map[string]handler{ 77 `-json`: emitJSON, 78 `--json`: emitJSON, 79 `-jsonl`: emitJSONL, 80 `--jsonl`: emitJSONL, 81 `-jsons`: emitJSONS, 82 `--jsons`: emitJSONS, 83 } 84 85 func main() { 86 if len(os.Args) > 1 { 87 switch os.Args[1] { 88 case `-h`, `--h`, `-help`, `--help`: 89 os.Stdout.WriteString(info[1:]) 90 return 91 } 92 } 93 94 args := os.Args[1:] 95 emit := emitJSON 96 if len(args) > 0 { 97 if v, ok := handlers[args[0]]; ok { 98 emit = v 99 args = args[1:] 100 } 101 } 102 103 if len(args) > 1 { 104 os.Stdout.WriteString(info[1:]) 105 os.Exit(1) 106 } 107 108 path := `-` 109 if len(args) > 0 { 110 path = args[0] 111 } 112 113 err := handleInput(os.Stdout, os.Stdin, path, emit) 114 if _, ok := err.(noMoreOutput); ok { 115 return 116 } 117 118 if err != nil { 119 os.Stderr.WriteString("\x1b[31m") 120 os.Stderr.WriteString(err.Error()) 121 os.Stderr.WriteString("\x1b[0m\n") 122 os.Exit(1) 123 } 124 } 125 126 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error { 127 bw := bufio.NewWriter(w) 128 defer bw.Flush() 129 130 if path == `-` { 131 return handle(bw, makeRowReader(r)) 132 } 133 134 f, err := os.Open(path) 135 if err != nil { 136 // on windows, file-not-found error messages may mention `CreateFile`, 137 // even when trying to open files in read-only mode 138 return errors.New(`can't open file named ` + path) 139 } 140 defer f.Close() 141 142 return handle(bw, makeRowReader(f)) 143 } 144 145 func emitJSON(w *bufio.Writer, rr *bufio.Scanner) error { 146 got := 0 147 var keys []string 148 149 err := loopTSV(rr, func(i int, row []string) error { 150 got = i 151 if i == 0 { 152 keys = make([]string, 0, len(row)) 153 for _, s := range row { 154 keys = append(keys, strings.Clone(s)) 155 } 156 return nil 157 } 158 159 if i == 1 { 160 w.WriteByte('[') 161 } else { 162 err := w.WriteByte(',') 163 if err != nil { 164 return noMoreOutput{} 165 } 166 } 167 168 w.WriteByte('{') 169 for i, s := range row { 170 if i > 0 { 171 w.WriteByte(',') 172 } 173 174 if numberLike(s) { 175 w.WriteByte('"') 176 writeInnerStringJSON(w, keys[i]) 177 w.WriteString(`":`) 178 w.WriteString(s) 179 continue 180 } 181 182 writeKV(w, keys[i], s) 183 } 184 185 for i := len(row); i < len(keys); i++ { 186 if i > 0 { 187 w.WriteByte(',') 188 } 189 w.WriteByte('"') 190 writeInnerStringJSON(w, keys[i]) 191 w.WriteString(`":null`) 192 } 193 w.WriteByte('}') 194 195 return nil 196 }) 197 198 if err != nil { 199 return err 200 } 201 202 if got > 0 { 203 w.WriteString("]\n") 204 } 205 return nil 206 } 207 208 func emitJSONL(w *bufio.Writer, rr *bufio.Scanner) error { 209 var keys []string 210 211 return loopTSV(rr, func(i int, row []string) error { 212 if i == 0 { 213 keys = make([]string, 0, len(row)) 214 for _, s := range row { 215 c := string(append([]byte{}, s...)) 216 keys = append(keys, c) 217 } 218 return nil 219 } 220 221 w.WriteByte('{') 222 for i, s := range row { 223 if i > 0 { 224 w.WriteByte(',') 225 w.WriteByte(' ') 226 } 227 228 if numberLike(s) { 229 w.WriteByte('"') 230 writeInnerStringJSON(w, keys[i]) 231 w.WriteString(`": `) 232 w.WriteString(s) 233 continue 234 } 235 236 writeKV(w, keys[i], s) 237 } 238 239 for i := len(row); i < len(keys); i++ { 240 if i > 0 { 241 w.WriteByte(',') 242 w.WriteByte(' ') 243 } 244 w.WriteByte('"') 245 writeInnerStringJSON(w, keys[i]) 246 w.WriteString(`": null`) 247 } 248 w.WriteByte('}') 249 250 w.WriteByte('\n') 251 if err := w.Flush(); err != nil { 252 return noMoreOutput{} 253 } 254 return nil 255 }) 256 } 257 258 func emitJSONS(w *bufio.Writer, rr *bufio.Scanner) error { 259 got := 0 260 var keys []string 261 262 err := loopTSV(rr, func(i int, row []string) error { 263 got = i 264 265 if i == 0 { 266 keys = make([]string, 0, len(row)) 267 for _, s := range row { 268 c := string(append([]byte{}, s...)) 269 keys = append(keys, c) 270 } 271 return nil 272 } 273 274 if i == 1 { 275 w.WriteByte('[') 276 } else { 277 err := w.WriteByte(',') 278 if err != nil { 279 return noMoreOutput{} 280 } 281 } 282 283 w.WriteByte('{') 284 for i, s := range row { 285 if i > 0 { 286 w.WriteByte(',') 287 } 288 writeKV(w, keys[i], s) 289 } 290 291 for i := len(row); i < len(keys); i++ { 292 if i > 0 { 293 w.WriteByte(',') 294 } 295 w.WriteByte('"') 296 writeInnerStringJSON(w, keys[i]) 297 w.WriteString(`":null`) 298 } 299 w.WriteByte('}') 300 301 return nil 302 }) 303 304 if err != nil { 305 return err 306 } 307 308 if got > 0 { 309 w.WriteString("]\n") 310 } 311 return nil 312 } 313 314 // writeInnerStringJSON helps JSON-encode strings more quickly 315 func writeInnerStringJSON(w *bufio.Writer, s string) { 316 needsEscaping := false 317 for _, r := range s { 318 if '#' <= r && r <= '~' && r != '\\' { 319 continue 320 } 321 if r == ' ' || r == '!' || unicode.IsLetter(r) { 322 continue 323 } 324 325 needsEscaping = true 326 break 327 } 328 329 if !needsEscaping { 330 w.WriteString(s) 331 return 332 } 333 334 outer, err := json.Marshal(s) 335 if err != nil { 336 return 337 } 338 inner := outer[1 : len(outer)-1] 339 w.Write(inner) 340 } 341 342 func writeKV(w *bufio.Writer, k string, s string) { 343 w.WriteByte('"') 344 writeInnerStringJSON(w, k) 345 w.WriteString(`": "`) 346 writeInnerStringJSON(w, s) 347 w.WriteByte('"') 348 } 349 350 func numberLike(s string) bool { 351 if len(s) == 0 { 352 return false 353 } 354 355 if s[0] == '-' { 356 s = s[1:] 357 } 358 359 if len(s) == 0 || s[0] < '0' || s[0] > '9' { 360 return false 361 } 362 363 for len(s) > 0 { 364 lead := s[0] 365 s = s[1:] 366 367 if lead == '.' { 368 return allDigits(s) 369 } 370 if lead < '0' || lead > '9' { 371 return false 372 } 373 } 374 375 return true 376 } 377 378 func allDigits(s string) bool { 379 if len(s) == 0 { 380 return false 381 } 382 383 for _, r := range s { 384 if r < '0' || r > '9' { 385 return false 386 } 387 } 388 return true 389 } 390 391 func makeRowReader(r io.Reader) *bufio.Scanner { 392 rr := bufio.NewScanner(r) 393 rr.Buffer(nil, 8*1024*1024*1024) 394 return rr 395 } 396 397 func loopTSV(rr *bufio.Scanner, handle func(i int, row []string) error) error { 398 i := 0 399 width := 0 400 var row []string 401 402 for rr.Scan() { 403 line := rr.Text() 404 if len(line) == 0 { 405 continue 406 } 407 408 if i == 0 { 409 width = len(row) 410 } 411 412 row = appendTSV(row[:0], line) 413 if len(row) > width { 414 return errors.New(`data-row has more items than the header`) 415 } 416 417 if err := handle(i, row); err != nil { 418 return err 419 } 420 i++ 421 } 422 423 return nil 424 } 425 426 func appendTSV(dst []string, row string) []string { 427 for len(dst) > 0 { 428 i := strings.IndexByte(row, '\t') 429 if i < 0 { 430 return append(dst, row) 431 } 432 433 dst = append(dst, row[:i]) 434 row = row[i+1:] 435 } 436 437 return dst 438 }