File: detsv.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath detsv.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40     "strings"
  41     "unicode"
  42 )
  43 
  44 const info = `
  45 detsv [options...] [filepath...]
  46 
  47 
  48 This cmd-line app turns TSV (tab-separated values) data into general either
  49 JSON (JavaScript Object Notation), JSONS (JSON Strings), or even JSONL (JSON
  50 Lines).
  51 
  52 When not given a filepath, the input is read from the standard input.
  53 
  54 Options, when given, can either start with a single or a double-dash:
  55 
  56     -h       show this help message
  57     -help    show this help message
  58 
  59     -json    emit JSON, where numbers are auto-detected
  60     -jsonl   emit JSON Lines, where numbers are auto-detected
  61     -jsons   emit JSON Strings, where object values are strings or null
  62 `
  63 
  64 // noMoreOutput is a custom error-type meant to be deliberately ignored
  65 type noMoreOutput struct{}
  66 
  67 func (nmo noMoreOutput) Error() string {
  68     return `no more output`
  69 }
  70 
  71 // handler is the type all TSV-converter funcs adhere to
  72 type handler func(*bufio.Writer, *bufio.Scanner) error
  73 
  74 var handlers = map[string]handler{
  75     `-json`:   emitJSON,
  76     `--json`:  emitJSON,
  77     `-jsonl`:  emitJSONL,
  78     `--jsonl`: emitJSONL,
  79     `-jsons`:  emitJSONS,
  80     `--jsons`: emitJSONS,
  81 }
  82 
  83 func main() {
  84     if len(os.Args) > 1 {
  85         switch os.Args[1] {
  86         case `-h`, `--h`, `-help`, `--help`:
  87             os.Stdout.WriteString(info[1:])
  88             return
  89         }
  90     }
  91 
  92     args := os.Args[1:]
  93     emit := emitJSON
  94     if len(args) > 0 {
  95         if v, ok := handlers[args[0]]; ok {
  96             emit = v
  97             args = args[1:]
  98         }
  99     }
 100 
 101     if len(args) > 1 {
 102         os.Stdout.WriteString(info[1:])
 103         os.Exit(1)
 104     }
 105 
 106     path := `-`
 107     if len(args) > 0 {
 108         path = args[0]
 109     }
 110 
 111     err := handleInput(os.Stdout, os.Stdin, path, emit)
 112     if _, ok := err.(noMoreOutput); ok {
 113         return
 114     }
 115 
 116     if err != nil {
 117         os.Stderr.WriteString(err.Error())
 118         os.Stderr.WriteString("\n")
 119         os.Exit(1)
 120     }
 121 }
 122 
 123 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error {
 124     bw := bufio.NewWriter(w)
 125     defer bw.Flush()
 126 
 127     if path == `-` {
 128         return handle(bw, makeRowReader(r))
 129     }
 130 
 131     f, err := os.Open(path)
 132     if err != nil {
 133         // on windows, file-not-found error messages may mention `CreateFile`,
 134         // even when trying to open files in read-only mode
 135         return errors.New(`can't open file named ` + path)
 136     }
 137     defer f.Close()
 138 
 139     return handle(bw, makeRowReader(f))
 140 }
 141 
 142 func emitJSON(w *bufio.Writer, rr *bufio.Scanner) error {
 143     got := 0
 144     var keys []string
 145 
 146     err := loopTSV(rr, func(i int, row []string) error {
 147         got = i
 148         if i == 0 {
 149             keys = make([]string, 0, len(row))
 150             for _, s := range row {
 151                 keys = append(keys, strings.Clone(s))
 152             }
 153             return nil
 154         }
 155 
 156         if i == 1 {
 157             w.WriteByte('[')
 158         } else {
 159             err := w.WriteByte(',')
 160             if err != nil {
 161                 return noMoreOutput{}
 162             }
 163         }
 164 
 165         w.WriteByte('{')
 166         for i, s := range row {
 167             if i > 0 {
 168                 w.WriteByte(',')
 169             }
 170 
 171             if numberLike(s) {
 172                 w.WriteByte('"')
 173                 writeInnerStringJSON(w, keys[i])
 174                 w.WriteString(`":`)
 175                 w.WriteString(s)
 176                 continue
 177             }
 178 
 179             writeKV(w, keys[i], s)
 180         }
 181 
 182         for i := len(row); i < len(keys); i++ {
 183             if i > 0 {
 184                 w.WriteByte(',')
 185             }
 186             w.WriteByte('"')
 187             writeInnerStringJSON(w, keys[i])
 188             w.WriteString(`":null`)
 189         }
 190         w.WriteByte('}')
 191 
 192         return nil
 193     })
 194 
 195     if err != nil {
 196         return err
 197     }
 198 
 199     if got > 0 {
 200         w.WriteString("]\n")
 201     }
 202     return nil
 203 }
 204 
 205 func emitJSONL(w *bufio.Writer, rr *bufio.Scanner) error {
 206     var keys []string
 207 
 208     return loopTSV(rr, func(i int, row []string) error {
 209         if i == 0 {
 210             keys = make([]string, 0, len(row))
 211             for _, s := range row {
 212                 c := string(append([]byte{}, s...))
 213                 keys = append(keys, c)
 214             }
 215             return nil
 216         }
 217 
 218         w.WriteByte('{')
 219         for i, s := range row {
 220             if i > 0 {
 221                 w.WriteByte(',')
 222                 w.WriteByte(' ')
 223             }
 224 
 225             if numberLike(s) {
 226                 w.WriteByte('"')
 227                 writeInnerStringJSON(w, keys[i])
 228                 w.WriteString(`": `)
 229                 w.WriteString(s)
 230                 continue
 231             }
 232 
 233             writeKV(w, keys[i], s)
 234         }
 235 
 236         for i := len(row); i < len(keys); i++ {
 237             if i > 0 {
 238                 w.WriteByte(',')
 239                 w.WriteByte(' ')
 240             }
 241             w.WriteByte('"')
 242             writeInnerStringJSON(w, keys[i])
 243             w.WriteString(`": null`)
 244         }
 245         w.WriteByte('}')
 246 
 247         w.WriteByte('\n')
 248         if err := w.Flush(); err != nil {
 249             return noMoreOutput{}
 250         }
 251         return nil
 252     })
 253 }
 254 
 255 func emitJSONS(w *bufio.Writer, rr *bufio.Scanner) error {
 256     got := 0
 257     var keys []string
 258 
 259     err := loopTSV(rr, func(i int, row []string) error {
 260         got = i
 261 
 262         if i == 0 {
 263             keys = make([]string, 0, len(row))
 264             for _, s := range row {
 265                 c := string(append([]byte{}, s...))
 266                 keys = append(keys, c)
 267             }
 268             return nil
 269         }
 270 
 271         if i == 1 {
 272             w.WriteByte('[')
 273         } else {
 274             err := w.WriteByte(',')
 275             if err != nil {
 276                 return noMoreOutput{}
 277             }
 278         }
 279 
 280         w.WriteByte('{')
 281         for i, s := range row {
 282             if i > 0 {
 283                 w.WriteByte(',')
 284             }
 285             writeKV(w, keys[i], s)
 286         }
 287 
 288         for i := len(row); i < len(keys); i++ {
 289             if i > 0 {
 290                 w.WriteByte(',')
 291             }
 292             w.WriteByte('"')
 293             writeInnerStringJSON(w, keys[i])
 294             w.WriteString(`":null`)
 295         }
 296         w.WriteByte('}')
 297 
 298         return nil
 299     })
 300 
 301     if err != nil {
 302         return err
 303     }
 304 
 305     if got > 0 {
 306         w.WriteString("]\n")
 307     }
 308     return nil
 309 }
 310 
 311 // writeInnerStringJSON helps JSON-encode strings more quickly
 312 func writeInnerStringJSON(w *bufio.Writer, s string) {
 313     needsEscaping := false
 314     for _, r := range s {
 315         if '#' <= r && r <= '~' && r != '\\' {
 316             continue
 317         }
 318         if r == ' ' || r == '!' || unicode.IsLetter(r) {
 319             continue
 320         }
 321 
 322         needsEscaping = true
 323         break
 324     }
 325 
 326     if !needsEscaping {
 327         w.WriteString(s)
 328         return
 329     }
 330 
 331     outer, err := json.Marshal(s)
 332     if err != nil {
 333         return
 334     }
 335     inner := outer[1 : len(outer)-1]
 336     w.Write(inner)
 337 }
 338 
 339 func writeKV(w *bufio.Writer, k string, s string) {
 340     w.WriteByte('"')
 341     writeInnerStringJSON(w, k)
 342     w.WriteString(`": "`)
 343     writeInnerStringJSON(w, s)
 344     w.WriteByte('"')
 345 }
 346 
 347 func numberLike(s string) bool {
 348     if len(s) == 0 {
 349         return false
 350     }
 351 
 352     if s[0] == '-' {
 353         s = s[1:]
 354     }
 355 
 356     if len(s) == 0 || s[0] < '0' || s[0] > '9' {
 357         return false
 358     }
 359 
 360     for len(s) > 0 {
 361         lead := s[0]
 362         s = s[1:]
 363 
 364         if lead == '.' {
 365             return allDigits(s)
 366         }
 367         if lead < '0' || lead > '9' {
 368             return false
 369         }
 370     }
 371 
 372     return true
 373 }
 374 
 375 func allDigits(s string) bool {
 376     if len(s) == 0 {
 377         return false
 378     }
 379 
 380     for _, r := range s {
 381         if r < '0' || r > '9' {
 382             return false
 383         }
 384     }
 385     return true
 386 }
 387 
 388 func makeRowReader(r io.Reader) *bufio.Scanner {
 389     rr := bufio.NewScanner(r)
 390     rr.Buffer(nil, 8*1024*1024*1024)
 391     return rr
 392 }
 393 
 394 func loopTSV(rr *bufio.Scanner, handle func(i int, row []string) error) error {
 395     i := 0
 396     width := 0
 397     var row []string
 398 
 399     for rr.Scan() {
 400         line := rr.Text()
 401         if len(line) == 0 {
 402             continue
 403         }
 404 
 405         if i == 0 {
 406             width = len(row)
 407         }
 408 
 409         row = appendTSV(row[:0], line)
 410         if len(row) > width {
 411             return errors.New(`data-row has more items than the header`)
 412         }
 413 
 414         if err := handle(i, row); err != nil {
 415             return err
 416         }
 417         i++
 418     }
 419 
 420     return nil
 421 }
 422 
 423 func appendTSV(dst []string, row string) []string {
 424     for len(dst) > 0 {
 425         i := strings.IndexByte(row, '\t')
 426         if i < 0 {
 427             return append(dst, row)
 428         }
 429 
 430         dst = append(dst, row[:i])
 431         row = row[i+1:]
 432     }
 433 
 434     return dst
 435 }