File: detsv.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for detsv.
  27 
  28 To compile a smaller-sized command-line app, you can use the `go` command as
  29 follows:
  30 
  31 go build -ldflags "-s -w" -trimpath detsv.go
  32 */
  33 
  34 package main
  35 
  36 import (
  37     "bufio"
  38     "encoding/json"
  39     "errors"
  40     "io"
  41     "os"
  42     "strings"
  43     "unicode"
  44 )
  45 
  46 const info = `
  47 detsv [options...] [filepath...]
  48 
  49 
  50 This cmd-line app turns TSV (tab-separated values) data into general either
  51 JSON (JavaScript Object Notation), JSONS (JSON Strings), or even JSONL (JSON
  52 Lines).
  53 
  54 When not given a filepath, the input is read from the standard input.
  55 
  56 Options, when given, can either start with a single or a double-dash:
  57 
  58     -h       show this help message
  59     -help    show this help message
  60 
  61     -json    emit JSON, where numbers are auto-detected
  62     -jsonl   emit JSON Lines, where numbers are auto-detected
  63     -jsons   emit JSON Strings, where object values are strings or null
  64 `
  65 
  66 // noMoreOutput is a custom error-type meant to be deliberately ignored
  67 type noMoreOutput struct{}
  68 
  69 func (nmo noMoreOutput) Error() string {
  70     return `no more output`
  71 }
  72 
  73 // handler is the type all TSV-converter funcs adhere to
  74 type handler func(*bufio.Writer, *bufio.Scanner) error
  75 
  76 var handlers = map[string]handler{
  77     `-json`:   emitJSON,
  78     `--json`:  emitJSON,
  79     `-jsonl`:  emitJSONL,
  80     `--jsonl`: emitJSONL,
  81     `-jsons`:  emitJSONS,
  82     `--jsons`: emitJSONS,
  83 }
  84 
  85 func main() {
  86     if len(os.Args) > 1 {
  87         switch os.Args[1] {
  88         case `-h`, `--h`, `-help`, `--help`:
  89             os.Stdout.WriteString(info[1:])
  90             return
  91         }
  92     }
  93 
  94     args := os.Args[1:]
  95     emit := emitJSON
  96     if len(args) > 0 {
  97         if v, ok := handlers[args[0]]; ok {
  98             emit = v
  99             args = args[1:]
 100         }
 101     }
 102 
 103     if len(args) > 1 {
 104         os.Stdout.WriteString(info[1:])
 105         os.Exit(1)
 106     }
 107 
 108     path := `-`
 109     if len(args) > 0 {
 110         path = args[0]
 111     }
 112 
 113     err := handleInput(os.Stdout, os.Stdin, path, emit)
 114     if _, ok := err.(noMoreOutput); ok {
 115         return
 116     }
 117 
 118     if err != nil {
 119         os.Stderr.WriteString("\x1b[31m")
 120         os.Stderr.WriteString(err.Error())
 121         os.Stderr.WriteString("\x1b[0m\n")
 122         os.Exit(1)
 123     }
 124 }
 125 
 126 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error {
 127     bw := bufio.NewWriter(w)
 128     defer bw.Flush()
 129 
 130     if path == `-` {
 131         return handle(bw, makeRowReader(r))
 132     }
 133 
 134     f, err := os.Open(path)
 135     if err != nil {
 136         // on windows, file-not-found error messages may mention `CreateFile`,
 137         // even when trying to open files in read-only mode
 138         return errors.New(`can't open file named ` + path)
 139     }
 140     defer f.Close()
 141 
 142     return handle(bw, makeRowReader(f))
 143 }
 144 
 145 func emitJSON(w *bufio.Writer, rr *bufio.Scanner) error {
 146     got := 0
 147     var keys []string
 148 
 149     err := loopTSV(rr, func(i int, row []string) error {
 150         got = i
 151         if i == 0 {
 152             keys = make([]string, 0, len(row))
 153             for _, s := range row {
 154                 keys = append(keys, strings.Clone(s))
 155             }
 156             return nil
 157         }
 158 
 159         if i == 1 {
 160             w.WriteByte('[')
 161         } else {
 162             err := w.WriteByte(',')
 163             if err != nil {
 164                 return noMoreOutput{}
 165             }
 166         }
 167 
 168         w.WriteByte('{')
 169         for i, s := range row {
 170             if i > 0 {
 171                 w.WriteByte(',')
 172             }
 173 
 174             if numberLike(s) {
 175                 w.WriteByte('"')
 176                 writeInnerStringJSON(w, keys[i])
 177                 w.WriteString(`":`)
 178                 w.WriteString(s)
 179                 continue
 180             }
 181 
 182             writeKV(w, keys[i], s)
 183         }
 184 
 185         for i := len(row); i < len(keys); i++ {
 186             if i > 0 {
 187                 w.WriteByte(',')
 188             }
 189             w.WriteByte('"')
 190             writeInnerStringJSON(w, keys[i])
 191             w.WriteString(`":null`)
 192         }
 193         w.WriteByte('}')
 194 
 195         return nil
 196     })
 197 
 198     if err != nil {
 199         return err
 200     }
 201 
 202     if got > 0 {
 203         w.WriteString("]\n")
 204     }
 205     return nil
 206 }
 207 
 208 func emitJSONL(w *bufio.Writer, rr *bufio.Scanner) error {
 209     var keys []string
 210 
 211     return loopTSV(rr, func(i int, row []string) error {
 212         if i == 0 {
 213             keys = make([]string, 0, len(row))
 214             for _, s := range row {
 215                 c := string(append([]byte{}, s...))
 216                 keys = append(keys, c)
 217             }
 218             return nil
 219         }
 220 
 221         w.WriteByte('{')
 222         for i, s := range row {
 223             if i > 0 {
 224                 w.WriteByte(',')
 225                 w.WriteByte(' ')
 226             }
 227 
 228             if numberLike(s) {
 229                 w.WriteByte('"')
 230                 writeInnerStringJSON(w, keys[i])
 231                 w.WriteString(`": `)
 232                 w.WriteString(s)
 233                 continue
 234             }
 235 
 236             writeKV(w, keys[i], s)
 237         }
 238 
 239         for i := len(row); i < len(keys); i++ {
 240             if i > 0 {
 241                 w.WriteByte(',')
 242                 w.WriteByte(' ')
 243             }
 244             w.WriteByte('"')
 245             writeInnerStringJSON(w, keys[i])
 246             w.WriteString(`": null`)
 247         }
 248         w.WriteByte('}')
 249 
 250         w.WriteByte('\n')
 251         if err := w.Flush(); err != nil {
 252             return noMoreOutput{}
 253         }
 254         return nil
 255     })
 256 }
 257 
 258 func emitJSONS(w *bufio.Writer, rr *bufio.Scanner) error {
 259     got := 0
 260     var keys []string
 261 
 262     err := loopTSV(rr, func(i int, row []string) error {
 263         got = i
 264 
 265         if i == 0 {
 266             keys = make([]string, 0, len(row))
 267             for _, s := range row {
 268                 c := string(append([]byte{}, s...))
 269                 keys = append(keys, c)
 270             }
 271             return nil
 272         }
 273 
 274         if i == 1 {
 275             w.WriteByte('[')
 276         } else {
 277             err := w.WriteByte(',')
 278             if err != nil {
 279                 return noMoreOutput{}
 280             }
 281         }
 282 
 283         w.WriteByte('{')
 284         for i, s := range row {
 285             if i > 0 {
 286                 w.WriteByte(',')
 287             }
 288             writeKV(w, keys[i], s)
 289         }
 290 
 291         for i := len(row); i < len(keys); i++ {
 292             if i > 0 {
 293                 w.WriteByte(',')
 294             }
 295             w.WriteByte('"')
 296             writeInnerStringJSON(w, keys[i])
 297             w.WriteString(`":null`)
 298         }
 299         w.WriteByte('}')
 300 
 301         return nil
 302     })
 303 
 304     if err != nil {
 305         return err
 306     }
 307 
 308     if got > 0 {
 309         w.WriteString("]\n")
 310     }
 311     return nil
 312 }
 313 
 314 // writeInnerStringJSON helps JSON-encode strings more quickly
 315 func writeInnerStringJSON(w *bufio.Writer, s string) {
 316     needsEscaping := false
 317     for _, r := range s {
 318         if '#' <= r && r <= '~' && r != '\\' {
 319             continue
 320         }
 321         if r == ' ' || r == '!' || unicode.IsLetter(r) {
 322             continue
 323         }
 324 
 325         needsEscaping = true
 326         break
 327     }
 328 
 329     if !needsEscaping {
 330         w.WriteString(s)
 331         return
 332     }
 333 
 334     outer, err := json.Marshal(s)
 335     if err != nil {
 336         return
 337     }
 338     inner := outer[1 : len(outer)-1]
 339     w.Write(inner)
 340 }
 341 
 342 func writeKV(w *bufio.Writer, k string, s string) {
 343     w.WriteByte('"')
 344     writeInnerStringJSON(w, k)
 345     w.WriteString(`": "`)
 346     writeInnerStringJSON(w, s)
 347     w.WriteByte('"')
 348 }
 349 
 350 func numberLike(s string) bool {
 351     if len(s) == 0 {
 352         return false
 353     }
 354 
 355     if s[0] == '-' {
 356         s = s[1:]
 357     }
 358 
 359     if len(s) == 0 || s[0] < '0' || s[0] > '9' {
 360         return false
 361     }
 362 
 363     for len(s) > 0 {
 364         lead := s[0]
 365         s = s[1:]
 366 
 367         if lead == '.' {
 368             return allDigits(s)
 369         }
 370         if lead < '0' || lead > '9' {
 371             return false
 372         }
 373     }
 374 
 375     return true
 376 }
 377 
 378 func allDigits(s string) bool {
 379     if len(s) == 0 {
 380         return false
 381     }
 382 
 383     for _, r := range s {
 384         if r < '0' || r > '9' {
 385             return false
 386         }
 387     }
 388     return true
 389 }
 390 
 391 func makeRowReader(r io.Reader) *bufio.Scanner {
 392     rr := bufio.NewScanner(r)
 393     rr.Buffer(nil, 8*1024*1024*1024)
 394     return rr
 395 }
 396 
 397 func loopTSV(rr *bufio.Scanner, handle func(i int, row []string) error) error {
 398     i := 0
 399     width := 0
 400     var row []string
 401 
 402     for rr.Scan() {
 403         line := rr.Text()
 404         if len(line) == 0 {
 405             continue
 406         }
 407 
 408         if i == 0 {
 409             width = len(row)
 410         }
 411 
 412         row = appendTSV(row[:0], line)
 413         if len(row) > width {
 414             return errors.New(`data-row has more items than the header`)
 415         }
 416 
 417         if err := handle(i, row); err != nil {
 418             return err
 419         }
 420         i++
 421     }
 422 
 423     return nil
 424 }
 425 
 426 func appendTSV(dst []string, row string) []string {
 427     for len(dst) > 0 {
 428         i := strings.IndexByte(row, '\t')
 429         if i < 0 {
 430             return append(dst, row)
 431         }
 432 
 433         dst = append(dst, row[:i])
 434         row = row[i+1:]
 435     }
 436 
 437     return dst
 438 }