File: decsv.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath decsv.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/csv"
  37     "encoding/json"
  38     "errors"
  39     "io"
  40     "os"
  41     "strings"
  42     "unicode"
  43 )
  44 
  45 const info = `
  46 decsv [options...] [filepath...]
  47 
  48 
  49 This cmd-line app turns CSV (comma-separated values) data into either TSV
  50 (tab-separated values), JSONS (JSON Strings), or general JSON (JavaScript
  51 Object Notation).
  52 
  53 When not given a filepath, the input is read from the standard input.
  54 
  55 Options, when given, can either start with a single or a double-dash:
  56 
  57     -h       show this help message
  58     -help    show this help message
  59 
  60     -json    emit JSON, where numbers are auto-detected
  61     -jsonl   emit JSON Lines, where numbers are auto-detected
  62     -jsons   emit JSON Strings, where object values are strings or null
  63     -tsv     emit TSV (tab-separated values) lines
  64 `
  65 
  66 // noMoreOutput is a custom error-type meant to be deliberately ignored
  67 type noMoreOutput struct{}
  68 
  69 func (nmo noMoreOutput) Error() string {
  70     return `no more output`
  71 }
  72 
  73 // handler is the type all CSV-converter funcs adhere to
  74 type handler func(*bufio.Writer, *csv.Reader) error
  75 
  76 var handlers = map[string]handler{
  77     `-json`:   emitJSON,
  78     `--json`:  emitJSON,
  79     `-jsonl`:  emitJSONL,
  80     `--jsonl`: emitJSONL,
  81     `-jsons`:  emitJSONS,
  82     `--jsons`: emitJSONS,
  83     `-tsv`:    emitTSV,
  84     `--tsv`:   emitTSV,
  85 }
  86 
  87 func main() {
  88     if len(os.Args) > 1 {
  89         switch os.Args[1] {
  90         case `-h`, `--h`, `-help`, `--help`:
  91             os.Stdout.WriteString(info[1:])
  92             return
  93         }
  94     }
  95 
  96     args := os.Args[1:]
  97     emit := emitTSV
  98     if len(args) > 0 {
  99         if v, ok := handlers[args[0]]; ok {
 100             emit = v
 101             args = args[1:]
 102         }
 103     }
 104 
 105     if len(args) > 1 {
 106         os.Stdout.WriteString(info[1:])
 107         os.Exit(1)
 108     }
 109 
 110     path := `-`
 111     if len(args) > 0 {
 112         path = args[0]
 113     }
 114 
 115     err := handleInput(os.Stdout, os.Stdin, path, emit)
 116     if _, ok := err.(noMoreOutput); ok {
 117         return
 118     }
 119 
 120     if err != nil {
 121         os.Stderr.WriteString(err.Error())
 122         os.Stderr.WriteString("\n")
 123         os.Exit(1)
 124     }
 125 }
 126 
 127 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error {
 128     bw := bufio.NewWriter(w)
 129     defer bw.Flush()
 130 
 131     if path == `-` {
 132         return handle(bw, makeRowReader(r))
 133     }
 134 
 135     f, err := os.Open(path)
 136     if err != nil {
 137         // on windows, file-not-found error messages may mention `CreateFile`,
 138         // even when trying to open files in read-only mode
 139         return errors.New(`can't open file named ` + path)
 140     }
 141     defer f.Close()
 142 
 143     return handle(bw, makeRowReader(f))
 144 }
 145 
 146 func emitJSON(w *bufio.Writer, rr *csv.Reader) error {
 147     got := 0
 148     var keys []string
 149 
 150     err := loopCSV(rr, func(i int, row []string) error {
 151         got++
 152 
 153         if i == 0 {
 154             keys = make([]string, 0, len(row))
 155             for _, s := range row {
 156                 keys = append(keys, strings.Clone(s))
 157             }
 158             return nil
 159         }
 160 
 161         if i == 1 {
 162             w.WriteByte('[')
 163         } else {
 164             err := w.WriteByte(',')
 165             if err != nil {
 166                 return noMoreOutput{}
 167             }
 168         }
 169 
 170         w.WriteByte('{')
 171         for i, s := range row {
 172             if i > 0 {
 173                 w.WriteByte(',')
 174             }
 175 
 176             if numberLike(s) {
 177                 w.WriteByte('"')
 178                 writeInnerStringJSON(w, keys[i])
 179                 w.WriteString(`":`)
 180                 w.WriteString(s)
 181                 continue
 182             }
 183 
 184             writeKV(w, keys[i], s)
 185         }
 186 
 187         for i := len(row); i < len(keys); i++ {
 188             if i > 0 {
 189                 w.WriteByte(',')
 190             }
 191             w.WriteByte('"')
 192             writeInnerStringJSON(w, keys[i])
 193             w.WriteString(`":null`)
 194         }
 195         w.WriteByte('}')
 196 
 197         return nil
 198     })
 199 
 200     if err != nil {
 201         return err
 202     }
 203 
 204     if got > 1 {
 205         w.WriteString("]\n")
 206     }
 207     return nil
 208 }
 209 
 210 func emitJSONL(w *bufio.Writer, rr *csv.Reader) error {
 211     var keys []string
 212 
 213     return loopCSV(rr, func(i int, row []string) error {
 214         if i == 0 {
 215             keys = make([]string, 0, len(row))
 216             for _, s := range row {
 217                 c := string(append([]byte{}, s...))
 218                 keys = append(keys, c)
 219             }
 220             return nil
 221         }
 222 
 223         w.WriteByte('{')
 224         for i, s := range row {
 225             if i > 0 {
 226                 w.WriteByte(',')
 227                 w.WriteByte(' ')
 228             }
 229 
 230             if numberLike(s) {
 231                 w.WriteByte('"')
 232                 writeInnerStringJSON(w, keys[i])
 233                 w.WriteString(`": `)
 234                 w.WriteString(s)
 235                 continue
 236             }
 237 
 238             writeKV(w, keys[i], s)
 239         }
 240 
 241         for i := len(row); i < len(keys); i++ {
 242             if i > 0 {
 243                 w.WriteByte(',')
 244                 w.WriteByte(' ')
 245             }
 246             w.WriteByte('"')
 247             writeInnerStringJSON(w, keys[i])
 248             w.WriteString(`": null`)
 249         }
 250         w.WriteByte('}')
 251 
 252         w.WriteByte('\n')
 253         if err := w.Flush(); err != nil {
 254             return noMoreOutput{}
 255         }
 256         return nil
 257     })
 258 }
 259 
 260 func emitJSONS(w *bufio.Writer, rr *csv.Reader) error {
 261     got := 0
 262     var keys []string
 263 
 264     err := loopCSV(rr, func(i int, row []string) error {
 265         got++
 266 
 267         if i == 0 {
 268             keys = make([]string, 0, len(row))
 269             for _, s := range row {
 270                 c := string(append([]byte{}, s...))
 271                 keys = append(keys, c)
 272             }
 273             return nil
 274         }
 275 
 276         if i == 1 {
 277             w.WriteByte('[')
 278         } else {
 279             err := w.WriteByte(',')
 280             if err != nil {
 281                 return noMoreOutput{}
 282             }
 283         }
 284 
 285         w.WriteByte('{')
 286         for i, s := range row {
 287             if i > 0 {
 288                 w.WriteByte(',')
 289             }
 290             writeKV(w, keys[i], s)
 291         }
 292 
 293         for i := len(row); i < len(keys); i++ {
 294             if i > 0 {
 295                 w.WriteByte(',')
 296             }
 297             w.WriteByte('"')
 298             writeInnerStringJSON(w, keys[i])
 299             w.WriteString(`":null`)
 300         }
 301         w.WriteByte('}')
 302 
 303         return nil
 304     })
 305 
 306     if err != nil {
 307         return err
 308     }
 309 
 310     if got > 1 {
 311         w.WriteString("]\n")
 312     }
 313     return nil
 314 }
 315 
 316 func emitTSV(w *bufio.Writer, rr *csv.Reader) error {
 317     width := -1
 318 
 319     return loopCSV(rr, func(i int, row []string) error {
 320         if width < 0 {
 321             width = len(row)
 322         }
 323 
 324         for i, s := range row {
 325             if strings.IndexByte(s, '\t') >= 0 {
 326                 const msg = `can't convert CSV whose items have tabs to TSV`
 327                 return errors.New(msg)
 328             }
 329             if i > 0 {
 330                 w.WriteByte('\t')
 331             }
 332             w.WriteString(s)
 333         }
 334 
 335         for i := len(row); i < width; i++ {
 336             w.WriteByte('\t')
 337         }
 338 
 339         w.WriteByte('\n')
 340         if err := w.Flush(); err != nil {
 341             // a write error may be the consequence of stdout being closed,
 342             // perhaps by another app along a pipe
 343             return noMoreOutput{}
 344         }
 345         return nil
 346     })
 347 }
 348 
 349 // writeInnerStringJSON helps JSON-encode strings more quickly
 350 func writeInnerStringJSON(w *bufio.Writer, s string) {
 351     needsEscaping := false
 352     for _, r := range s {
 353         if '#' <= r && r <= '~' && r != '\\' {
 354             continue
 355         }
 356         if r == ' ' || r == '!' || unicode.IsLetter(r) {
 357             continue
 358         }
 359 
 360         needsEscaping = true
 361         break
 362     }
 363 
 364     if !needsEscaping {
 365         w.WriteString(s)
 366         return
 367     }
 368 
 369     outer, err := json.Marshal(s)
 370     if err != nil {
 371         return
 372     }
 373     inner := outer[1 : len(outer)-1]
 374     w.Write(inner)
 375 }
 376 
 377 func writeKV(w *bufio.Writer, k string, s string) {
 378     w.WriteByte('"')
 379     writeInnerStringJSON(w, k)
 380     w.WriteString(`": "`)
 381     writeInnerStringJSON(w, s)
 382     w.WriteByte('"')
 383 }
 384 
 385 func numberLike(s string) bool {
 386     if len(s) == 0 {
 387         return false
 388     }
 389 
 390     if s[0] == '-' {
 391         s = s[1:]
 392     }
 393 
 394     if len(s) == 0 || s[0] < '0' || s[0] > '9' {
 395         return false
 396     }
 397 
 398     for len(s) > 0 {
 399         lead := s[0]
 400         s = s[1:]
 401 
 402         if lead == '.' {
 403             return allDigits(s)
 404         }
 405         if lead < '0' || lead > '9' {
 406             return false
 407         }
 408     }
 409 
 410     return true
 411 }
 412 
 413 func allDigits(s string) bool {
 414     if len(s) == 0 {
 415         return false
 416     }
 417 
 418     for _, r := range s {
 419         if r < '0' || r > '9' {
 420             return false
 421         }
 422     }
 423     return true
 424 }
 425 
 426 func makeRowReader(r io.Reader) *csv.Reader {
 427     rr := csv.NewReader(r)
 428     rr.LazyQuotes = true
 429     rr.ReuseRecord = true
 430     rr.FieldsPerRecord = -1
 431     return rr
 432 }
 433 
 434 func loopCSV(rr *csv.Reader, handle func(i int, row []string) error) error {
 435     width := 0
 436 
 437     for i := 0; true; i++ {
 438         row, err := rr.Read()
 439         if err == io.EOF {
 440             return nil
 441         }
 442 
 443         if err != nil {
 444             return err
 445         }
 446 
 447         if i == 0 {
 448             width = len(row)
 449         }
 450 
 451         if len(row) > width {
 452             return errors.New(`data-row has more items than the header`)
 453         }
 454 
 455         if err := handle(i, row); err != nil {
 456             return err
 457         }
 458     }
 459 
 460     return nil
 461 }