File: detsv.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath detsv.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40     "strings"
  41     "unicode"
  42 )
  43 
  44 const info = `
  45 detsv [options...] [filepath...]
  46 
  47 
  48 This cmd-line app turns TSV (tab-separated values) data into general either
  49 JSON (JavaScript Object Notation), JSONS (JSON Strings), or even JSONL (JSON
  50 Lines).
  51 
  52 When not given a filepath, the input is read from the standard input.
  53 
  54 Options, when given, can either start with a single or a double-dash:
  55 
  56     -h       show this help message
  57     -help    show this help message
  58 
  59     -json    emit JSON, where numbers are auto-detected
  60     -jsonl   emit JSON Lines, where numbers are auto-detected
  61     -jsons   emit JSON Strings, where object values are strings or null
  62 `
  63 
  64 // noMoreOutput is a custom error-type meant to be deliberately ignored
  65 type noMoreOutput struct{}
  66 
  67 func (nmo noMoreOutput) Error() string {
  68     return `no more output`
  69 }
  70 
  71 // handler is the type all TSV-converter funcs adhere to
  72 type handler func(*bufio.Writer, *bufio.Scanner) error
  73 
  74 var handlers = map[string]handler{
  75     `-json`:   emitJSON,
  76     `--json`:  emitJSON,
  77     `-jsonl`:  emitJSONL,
  78     `--jsonl`: emitJSONL,
  79     `-jsons`:  emitJSONS,
  80     `--jsons`: emitJSONS,
  81 }
  82 
  83 func main() {
  84     if len(os.Args) > 1 {
  85         switch os.Args[1] {
  86         case `-h`, `--h`, `-help`, `--help`:
  87             os.Stdout.WriteString(info[1:])
  88             return
  89         }
  90     }
  91 
  92     args := os.Args[1:]
  93     emit := emitJSON
  94     if len(args) > 0 {
  95         if v, ok := handlers[args[0]]; ok {
  96             emit = v
  97             args = args[1:]
  98         }
  99     }
 100 
 101     if len(args) > 1 {
 102         os.Stdout.WriteString(info[1:])
 103         os.Exit(1)
 104     }
 105 
 106     path := `-`
 107     if len(args) > 0 {
 108         path = args[0]
 109     }
 110 
 111     err := handleInput(os.Stdout, os.Stdin, path, emit)
 112     if _, ok := err.(noMoreOutput); ok {
 113         return
 114     }
 115 
 116     if err != nil {
 117         os.Stderr.WriteString("\x1b[31m")
 118         os.Stderr.WriteString(err.Error())
 119         os.Stderr.WriteString("\x1b[0m\n")
 120         os.Exit(1)
 121     }
 122 }
 123 
 124 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error {
 125     bw := bufio.NewWriter(w)
 126     defer bw.Flush()
 127 
 128     if path == `-` {
 129         return handle(bw, makeRowReader(r))
 130     }
 131 
 132     f, err := os.Open(path)
 133     if err != nil {
 134         // on windows, file-not-found error messages may mention `CreateFile`,
 135         // even when trying to open files in read-only mode
 136         return errors.New(`can't open file named ` + path)
 137     }
 138     defer f.Close()
 139 
 140     return handle(bw, makeRowReader(f))
 141 }
 142 
 143 func emitJSON(w *bufio.Writer, rr *bufio.Scanner) error {
 144     got := 0
 145     var keys []string
 146 
 147     err := loopTSV(rr, func(i int, row []string) error {
 148         got = i
 149         if i == 0 {
 150             keys = make([]string, 0, len(row))
 151             for _, s := range row {
 152                 keys = append(keys, strings.Clone(s))
 153             }
 154             return nil
 155         }
 156 
 157         if i == 1 {
 158             w.WriteByte('[')
 159         } else {
 160             err := w.WriteByte(',')
 161             if err != nil {
 162                 return noMoreOutput{}
 163             }
 164         }
 165 
 166         w.WriteByte('{')
 167         for i, s := range row {
 168             if i > 0 {
 169                 w.WriteByte(',')
 170             }
 171 
 172             if numberLike(s) {
 173                 w.WriteByte('"')
 174                 writeInnerStringJSON(w, keys[i])
 175                 w.WriteString(`":`)
 176                 w.WriteString(s)
 177                 continue
 178             }
 179 
 180             writeKV(w, keys[i], s)
 181         }
 182 
 183         for i := len(row); i < len(keys); i++ {
 184             if i > 0 {
 185                 w.WriteByte(',')
 186             }
 187             w.WriteByte('"')
 188             writeInnerStringJSON(w, keys[i])
 189             w.WriteString(`":null`)
 190         }
 191         w.WriteByte('}')
 192 
 193         return nil
 194     })
 195 
 196     if err != nil {
 197         return err
 198     }
 199 
 200     if got > 0 {
 201         w.WriteString("]\n")
 202     }
 203     return nil
 204 }
 205 
 206 func emitJSONL(w *bufio.Writer, rr *bufio.Scanner) error {
 207     var keys []string
 208 
 209     return loopTSV(rr, func(i int, row []string) error {
 210         if i == 0 {
 211             keys = make([]string, 0, len(row))
 212             for _, s := range row {
 213                 c := string(append([]byte{}, s...))
 214                 keys = append(keys, c)
 215             }
 216             return nil
 217         }
 218 
 219         w.WriteByte('{')
 220         for i, s := range row {
 221             if i > 0 {
 222                 w.WriteByte(',')
 223                 w.WriteByte(' ')
 224             }
 225 
 226             if numberLike(s) {
 227                 w.WriteByte('"')
 228                 writeInnerStringJSON(w, keys[i])
 229                 w.WriteString(`": `)
 230                 w.WriteString(s)
 231                 continue
 232             }
 233 
 234             writeKV(w, keys[i], s)
 235         }
 236 
 237         for i := len(row); i < len(keys); i++ {
 238             if i > 0 {
 239                 w.WriteByte(',')
 240                 w.WriteByte(' ')
 241             }
 242             w.WriteByte('"')
 243             writeInnerStringJSON(w, keys[i])
 244             w.WriteString(`": null`)
 245         }
 246         w.WriteByte('}')
 247 
 248         w.WriteByte('\n')
 249         if err := w.Flush(); err != nil {
 250             return noMoreOutput{}
 251         }
 252         return nil
 253     })
 254 }
 255 
 256 func emitJSONS(w *bufio.Writer, rr *bufio.Scanner) error {
 257     got := 0
 258     var keys []string
 259 
 260     err := loopTSV(rr, func(i int, row []string) error {
 261         got = i
 262 
 263         if i == 0 {
 264             keys = make([]string, 0, len(row))
 265             for _, s := range row {
 266                 c := string(append([]byte{}, s...))
 267                 keys = append(keys, c)
 268             }
 269             return nil
 270         }
 271 
 272         if i == 1 {
 273             w.WriteByte('[')
 274         } else {
 275             err := w.WriteByte(',')
 276             if err != nil {
 277                 return noMoreOutput{}
 278             }
 279         }
 280 
 281         w.WriteByte('{')
 282         for i, s := range row {
 283             if i > 0 {
 284                 w.WriteByte(',')
 285             }
 286             writeKV(w, keys[i], s)
 287         }
 288 
 289         for i := len(row); i < len(keys); i++ {
 290             if i > 0 {
 291                 w.WriteByte(',')
 292             }
 293             w.WriteByte('"')
 294             writeInnerStringJSON(w, keys[i])
 295             w.WriteString(`":null`)
 296         }
 297         w.WriteByte('}')
 298 
 299         return nil
 300     })
 301 
 302     if err != nil {
 303         return err
 304     }
 305 
 306     if got > 0 {
 307         w.WriteString("]\n")
 308     }
 309     return nil
 310 }
 311 
 312 // writeInnerStringJSON helps JSON-encode strings more quickly
 313 func writeInnerStringJSON(w *bufio.Writer, s string) {
 314     needsEscaping := false
 315     for _, r := range s {
 316         if '#' <= r && r <= '~' && r != '\\' {
 317             continue
 318         }
 319         if r == ' ' || r == '!' || unicode.IsLetter(r) {
 320             continue
 321         }
 322 
 323         needsEscaping = true
 324         break
 325     }
 326 
 327     if !needsEscaping {
 328         w.WriteString(s)
 329         return
 330     }
 331 
 332     outer, err := json.Marshal(s)
 333     if err != nil {
 334         return
 335     }
 336     inner := outer[1 : len(outer)-1]
 337     w.Write(inner)
 338 }
 339 
 340 func writeKV(w *bufio.Writer, k string, s string) {
 341     w.WriteByte('"')
 342     writeInnerStringJSON(w, k)
 343     w.WriteString(`": "`)
 344     writeInnerStringJSON(w, s)
 345     w.WriteByte('"')
 346 }
 347 
 348 func numberLike(s string) bool {
 349     if len(s) == 0 {
 350         return false
 351     }
 352 
 353     if s[0] == '-' {
 354         s = s[1:]
 355     }
 356 
 357     if len(s) == 0 || s[0] < '0' || s[0] > '9' {
 358         return false
 359     }
 360 
 361     for len(s) > 0 {
 362         lead := s[0]
 363         s = s[1:]
 364 
 365         if lead == '.' {
 366             return allDigits(s)
 367         }
 368         if lead < '0' || lead > '9' {
 369             return false
 370         }
 371     }
 372 
 373     return true
 374 }
 375 
 376 func allDigits(s string) bool {
 377     if len(s) == 0 {
 378         return false
 379     }
 380 
 381     for _, r := range s {
 382         if r < '0' || r > '9' {
 383             return false
 384         }
 385     }
 386     return true
 387 }
 388 
 389 func makeRowReader(r io.Reader) *bufio.Scanner {
 390     rr := bufio.NewScanner(r)
 391     rr.Buffer(nil, 8*1024*1024*1024)
 392     return rr
 393 }
 394 
 395 func loopTSV(rr *bufio.Scanner, handle func(i int, row []string) error) error {
 396     i := 0
 397     width := 0
 398     var row []string
 399 
 400     for rr.Scan() {
 401         line := rr.Text()
 402         if len(line) == 0 {
 403             continue
 404         }
 405 
 406         if i == 0 {
 407             width = len(row)
 408         }
 409 
 410         row = appendTSV(row[:0], line)
 411         if len(row) > width {
 412             return errors.New(`data-row has more items than the header`)
 413         }
 414 
 415         if err := handle(i, row); err != nil {
 416             return err
 417         }
 418         i++
 419     }
 420 
 421     return nil
 422 }
 423 
 424 func appendTSV(dst []string, row string) []string {
 425     for len(dst) > 0 {
 426         i := strings.IndexByte(row, '\t')
 427         if i < 0 {
 428             return append(dst, row)
 429         }
 430 
 431         dst = append(dst, row[:i])
 432         row = row[i+1:]
 433     }
 434 
 435     return dst
 436 }