File: decsv.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath decsv.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/csv"
  37     "encoding/json"
  38     "errors"
  39     "io"
  40     "os"
  41     "strings"
  42     "unicode"
  43 )
  44 
  45 const info = `
  46 decsv [options...] [filepath...]
  47 
  48 
  49 This cmd-line app turns CSV (comma-separated values) data into either TSV
  50 (tab-separated values), JSONS (JSON Strings), or general JSON (JavaScript
  51 Object Notation).
  52 
  53 When not given a filepath, the input is read from the standard input.
  54 
  55 Options, when given, can either start with a single or a double-dash:
  56 
  57     -h       show this help message
  58     -help    show this help message
  59 
  60     -json    emit JSON, where numbers are auto-detected
  61     -jsonl   emit JSON Lines, where numbers are auto-detected
  62     -jsons   emit JSON Strings, where object values are strings or null
  63     -tsv     emit TSV (tab-separated values) lines
  64 `
  65 
  66 // noMoreOutput is a custom error-type meant to be deliberately ignored
  67 type noMoreOutput struct{}
  68 
  69 func (nmo noMoreOutput) Error() string {
  70     return `no more output`
  71 }
  72 
  73 // handler is the type all CSV-converter funcs adhere to
  74 type handler func(*bufio.Writer, *csv.Reader) error
  75 
  76 var handlers = map[string]handler{
  77     `-json`:   emitJSON,
  78     `--json`:  emitJSON,
  79     `-jsonl`:  emitJSONL,
  80     `--jsonl`: emitJSONL,
  81     `-jsons`:  emitJSONS,
  82     `--jsons`: emitJSONS,
  83     `-tsv`:    emitTSV,
  84     `--tsv`:   emitTSV,
  85 }
  86 
  87 func main() {
  88     if len(os.Args) > 1 {
  89         switch os.Args[1] {
  90         case `-h`, `--h`, `-help`, `--help`:
  91             os.Stdout.WriteString(info[1:])
  92             return
  93         }
  94     }
  95 
  96     args := os.Args[1:]
  97     emit := emitTSV
  98     if len(args) > 0 {
  99         if v, ok := handlers[args[0]]; ok {
 100             emit = v
 101             args = args[1:]
 102         }
 103     }
 104 
 105     if len(args) > 1 {
 106         os.Stdout.WriteString(info[1:])
 107         os.Exit(1)
 108     }
 109 
 110     path := `-`
 111     if len(args) > 0 {
 112         path = args[0]
 113     }
 114 
 115     err := handleInput(os.Stdout, os.Stdin, path, emit)
 116     if _, ok := err.(noMoreOutput); ok {
 117         return
 118     }
 119 
 120     if err != nil {
 121         os.Stderr.WriteString("\x1b[31m")
 122         os.Stderr.WriteString(err.Error())
 123         os.Stderr.WriteString("\x1b[0m\n")
 124         os.Exit(1)
 125     }
 126 }
 127 
 128 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error {
 129     bw := bufio.NewWriter(w)
 130     defer bw.Flush()
 131 
 132     if path == `-` {
 133         return handle(bw, makeRowReader(r))
 134     }
 135 
 136     f, err := os.Open(path)
 137     if err != nil {
 138         // on windows, file-not-found error messages may mention `CreateFile`,
 139         // even when trying to open files in read-only mode
 140         return errors.New(`can't open file named ` + path)
 141     }
 142     defer f.Close()
 143 
 144     return handle(bw, makeRowReader(f))
 145 }
 146 
 147 func emitJSON(w *bufio.Writer, rr *csv.Reader) error {
 148     got := 0
 149     var keys []string
 150 
 151     err := loopCSV(rr, func(i int, row []string) error {
 152         got++
 153 
 154         if i == 0 {
 155             keys = make([]string, 0, len(row))
 156             for _, s := range row {
 157                 keys = append(keys, strings.Clone(s))
 158             }
 159             return nil
 160         }
 161 
 162         if i == 1 {
 163             w.WriteByte('[')
 164         } else {
 165             err := w.WriteByte(',')
 166             if err != nil {
 167                 return noMoreOutput{}
 168             }
 169         }
 170 
 171         w.WriteByte('{')
 172         for i, s := range row {
 173             if i > 0 {
 174                 w.WriteByte(',')
 175             }
 176 
 177             if numberLike(s) {
 178                 w.WriteByte('"')
 179                 writeInnerStringJSON(w, keys[i])
 180                 w.WriteString(`":`)
 181                 w.WriteString(s)
 182                 continue
 183             }
 184 
 185             writeKV(w, keys[i], s)
 186         }
 187 
 188         for i := len(row); i < len(keys); i++ {
 189             if i > 0 {
 190                 w.WriteByte(',')
 191             }
 192             w.WriteByte('"')
 193             writeInnerStringJSON(w, keys[i])
 194             w.WriteString(`":null`)
 195         }
 196         w.WriteByte('}')
 197 
 198         return nil
 199     })
 200 
 201     if err != nil {
 202         return err
 203     }
 204 
 205     if got > 1 {
 206         w.WriteString("]\n")
 207     }
 208     return nil
 209 }
 210 
 211 func emitJSONL(w *bufio.Writer, rr *csv.Reader) error {
 212     var keys []string
 213 
 214     return loopCSV(rr, func(i int, row []string) error {
 215         if i == 0 {
 216             keys = make([]string, 0, len(row))
 217             for _, s := range row {
 218                 c := string(append([]byte{}, s...))
 219                 keys = append(keys, c)
 220             }
 221             return nil
 222         }
 223 
 224         w.WriteByte('{')
 225         for i, s := range row {
 226             if i > 0 {
 227                 w.WriteByte(',')
 228                 w.WriteByte(' ')
 229             }
 230 
 231             if numberLike(s) {
 232                 w.WriteByte('"')
 233                 writeInnerStringJSON(w, keys[i])
 234                 w.WriteString(`": `)
 235                 w.WriteString(s)
 236                 continue
 237             }
 238 
 239             writeKV(w, keys[i], s)
 240         }
 241 
 242         for i := len(row); i < len(keys); i++ {
 243             if i > 0 {
 244                 w.WriteByte(',')
 245                 w.WriteByte(' ')
 246             }
 247             w.WriteByte('"')
 248             writeInnerStringJSON(w, keys[i])
 249             w.WriteString(`": null`)
 250         }
 251         w.WriteByte('}')
 252 
 253         w.WriteByte('\n')
 254         if err := w.Flush(); err != nil {
 255             return noMoreOutput{}
 256         }
 257         return nil
 258     })
 259 }
 260 
 261 func emitJSONS(w *bufio.Writer, rr *csv.Reader) error {
 262     got := 0
 263     var keys []string
 264 
 265     err := loopCSV(rr, func(i int, row []string) error {
 266         got++
 267 
 268         if i == 0 {
 269             keys = make([]string, 0, len(row))
 270             for _, s := range row {
 271                 c := string(append([]byte{}, s...))
 272                 keys = append(keys, c)
 273             }
 274             return nil
 275         }
 276 
 277         if i == 1 {
 278             w.WriteByte('[')
 279         } else {
 280             err := w.WriteByte(',')
 281             if err != nil {
 282                 return noMoreOutput{}
 283             }
 284         }
 285 
 286         w.WriteByte('{')
 287         for i, s := range row {
 288             if i > 0 {
 289                 w.WriteByte(',')
 290             }
 291             writeKV(w, keys[i], s)
 292         }
 293 
 294         for i := len(row); i < len(keys); i++ {
 295             if i > 0 {
 296                 w.WriteByte(',')
 297             }
 298             w.WriteByte('"')
 299             writeInnerStringJSON(w, keys[i])
 300             w.WriteString(`":null`)
 301         }
 302         w.WriteByte('}')
 303 
 304         return nil
 305     })
 306 
 307     if err != nil {
 308         return err
 309     }
 310 
 311     if got > 1 {
 312         w.WriteString("]\n")
 313     }
 314     return nil
 315 }
 316 
 317 func emitTSV(w *bufio.Writer, rr *csv.Reader) error {
 318     width := -1
 319 
 320     return loopCSV(rr, func(i int, row []string) error {
 321         if width < 0 {
 322             width = len(row)
 323         }
 324 
 325         for i, s := range row {
 326             if strings.IndexByte(s, '\t') >= 0 {
 327                 const msg = `can't convert CSV whose items have tabs to TSV`
 328                 return errors.New(msg)
 329             }
 330             if i > 0 {
 331                 w.WriteByte('\t')
 332             }
 333             w.WriteString(s)
 334         }
 335 
 336         for i := len(row); i < width; i++ {
 337             w.WriteByte('\t')
 338         }
 339 
 340         w.WriteByte('\n')
 341         if err := w.Flush(); err != nil {
 342             // a write error may be the consequence of stdout being closed,
 343             // perhaps by another app along a pipe
 344             return noMoreOutput{}
 345         }
 346         return nil
 347     })
 348 }
 349 
 350 // writeInnerStringJSON helps JSON-encode strings more quickly
 351 func writeInnerStringJSON(w *bufio.Writer, s string) {
 352     needsEscaping := false
 353     for _, r := range s {
 354         if '#' <= r && r <= '~' && r != '\\' {
 355             continue
 356         }
 357         if r == ' ' || r == '!' || unicode.IsLetter(r) {
 358             continue
 359         }
 360 
 361         needsEscaping = true
 362         break
 363     }
 364 
 365     if !needsEscaping {
 366         w.WriteString(s)
 367         return
 368     }
 369 
 370     outer, err := json.Marshal(s)
 371     if err != nil {
 372         return
 373     }
 374     inner := outer[1 : len(outer)-1]
 375     w.Write(inner)
 376 }
 377 
 378 func writeKV(w *bufio.Writer, k string, s string) {
 379     w.WriteByte('"')
 380     writeInnerStringJSON(w, k)
 381     w.WriteString(`": "`)
 382     writeInnerStringJSON(w, s)
 383     w.WriteByte('"')
 384 }
 385 
 386 func numberLike(s string) bool {
 387     if len(s) == 0 {
 388         return false
 389     }
 390 
 391     if s[0] == '-' {
 392         s = s[1:]
 393     }
 394 
 395     if len(s) == 0 || s[0] < '0' || s[0] > '9' {
 396         return false
 397     }
 398 
 399     for len(s) > 0 {
 400         lead := s[0]
 401         s = s[1:]
 402 
 403         if lead == '.' {
 404             return allDigits(s)
 405         }
 406         if lead < '0' || lead > '9' {
 407             return false
 408         }
 409     }
 410 
 411     return true
 412 }
 413 
 414 func allDigits(s string) bool {
 415     if len(s) == 0 {
 416         return false
 417     }
 418 
 419     for _, r := range s {
 420         if r < '0' || r > '9' {
 421             return false
 422         }
 423     }
 424     return true
 425 }
 426 
 427 func makeRowReader(r io.Reader) *csv.Reader {
 428     rr := csv.NewReader(r)
 429     rr.LazyQuotes = true
 430     rr.ReuseRecord = true
 431     rr.FieldsPerRecord = -1
 432     return rr
 433 }
 434 
 435 func loopCSV(rr *csv.Reader, handle func(i int, row []string) error) error {
 436     width := 0
 437 
 438     for i := 0; true; i++ {
 439         row, err := rr.Read()
 440         if err == io.EOF {
 441             return nil
 442         }
 443 
 444         if err != nil {
 445             return err
 446         }
 447 
 448         if i == 0 {
 449             width = len(row)
 450         }
 451 
 452         if len(row) > width {
 453             return errors.New(`data-row has more items than the header`)
 454         }
 455 
 456         if err := handle(i, row); err != nil {
 457             return err
 458         }
 459     }
 460 
 461     return nil
 462 }