File: decsv.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for decsv.
  27 
  28 To compile a smaller-sized command-line app, you can use the `go` command as
  29 follows:
  30 
  31 go build -ldflags "-s -w" -trimpath decsv.go
  32 */
  33 
  34 package main
  35 
  36 import (
  37     "bufio"
  38     "encoding/csv"
  39     "encoding/json"
  40     "errors"
  41     "io"
  42     "os"
  43     "strings"
  44     "unicode"
  45 )
  46 
  47 const info = `
  48 decsv [options...] [filepath...]
  49 
  50 
  51 This cmd-line app turns CSV (comma-separated values) data into either TSV
  52 (tab-separated values), JSONS (JSON Strings), or general JSON (JavaScript
  53 Object Notation).
  54 
  55 When not given a filepath, the input is read from the standard input.
  56 
  57 Options, when given, can either start with a single or a double-dash:
  58 
  59     -h       show this help message
  60     -help    show this help message
  61 
  62     -json    emit JSON, where numbers are auto-detected
  63     -jsonl   emit JSON Lines, where numbers are auto-detected
  64     -jsons   emit JSON Strings, where object values are strings or null
  65     -tsv     emit TSV (tab-separated values) lines
  66 `
  67 
  68 // noMoreOutput is a custom error-type meant to be deliberately ignored
  69 type noMoreOutput struct{}
  70 
  71 func (nmo noMoreOutput) Error() string {
  72     return `no more output`
  73 }
  74 
  75 // handler is the type all CSV-converter funcs adhere to
  76 type handler func(*bufio.Writer, *csv.Reader) error
  77 
  78 var handlers = map[string]handler{
  79     `-json`:   emitJSON,
  80     `--json`:  emitJSON,
  81     `-jsonl`:  emitJSONL,
  82     `--jsonl`: emitJSONL,
  83     `-jsons`:  emitJSONS,
  84     `--jsons`: emitJSONS,
  85     `-tsv`:    emitTSV,
  86     `--tsv`:   emitTSV,
  87 }
  88 
  89 func main() {
  90     if len(os.Args) > 1 {
  91         switch os.Args[1] {
  92         case `-h`, `--h`, `-help`, `--help`:
  93             os.Stdout.WriteString(info[1:])
  94             return
  95         }
  96     }
  97 
  98     args := os.Args[1:]
  99     emit := emitTSV
 100     if len(args) > 0 {
 101         if v, ok := handlers[args[0]]; ok {
 102             emit = v
 103             args = args[1:]
 104         }
 105     }
 106 
 107     if len(args) > 1 {
 108         os.Stdout.WriteString(info[1:])
 109         os.Exit(1)
 110     }
 111 
 112     path := `-`
 113     if len(args) > 0 {
 114         path = args[0]
 115     }
 116 
 117     err := handleInput(os.Stdout, os.Stdin, path, emit)
 118     if _, ok := err.(noMoreOutput); ok {
 119         return
 120     }
 121 
 122     if err != nil {
 123         os.Stderr.WriteString("\x1b[31m")
 124         os.Stderr.WriteString(err.Error())
 125         os.Stderr.WriteString("\x1b[0m\n")
 126         os.Exit(1)
 127     }
 128 }
 129 
 130 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error {
 131     bw := bufio.NewWriter(w)
 132     defer bw.Flush()
 133 
 134     if path == `-` {
 135         return handle(bw, makeRowReader(r))
 136     }
 137 
 138     f, err := os.Open(path)
 139     if err != nil {
 140         // on windows, file-not-found error messages may mention `CreateFile`,
 141         // even when trying to open files in read-only mode
 142         return errors.New(`can't open file named ` + path)
 143     }
 144     defer f.Close()
 145 
 146     return handle(bw, makeRowReader(f))
 147 }
 148 
 149 func emitJSON(w *bufio.Writer, rr *csv.Reader) error {
 150     got := 0
 151     var keys []string
 152 
 153     err := loopCSV(rr, func(i int, row []string) error {
 154         got++
 155 
 156         if i == 0 {
 157             keys = make([]string, 0, len(row))
 158             for _, s := range row {
 159                 keys = append(keys, strings.Clone(s))
 160             }
 161             return nil
 162         }
 163 
 164         if i == 1 {
 165             w.WriteByte('[')
 166         } else {
 167             err := w.WriteByte(',')
 168             if err != nil {
 169                 return noMoreOutput{}
 170             }
 171         }
 172 
 173         w.WriteByte('{')
 174         for i, s := range row {
 175             if i > 0 {
 176                 w.WriteByte(',')
 177             }
 178 
 179             if numberLike(s) {
 180                 w.WriteByte('"')
 181                 writeInnerStringJSON(w, keys[i])
 182                 w.WriteString(`":`)
 183                 w.WriteString(s)
 184                 continue
 185             }
 186 
 187             writeKV(w, keys[i], s)
 188         }
 189 
 190         for i := len(row); i < len(keys); i++ {
 191             if i > 0 {
 192                 w.WriteByte(',')
 193             }
 194             w.WriteByte('"')
 195             writeInnerStringJSON(w, keys[i])
 196             w.WriteString(`":null`)
 197         }
 198         w.WriteByte('}')
 199 
 200         return nil
 201     })
 202 
 203     if err != nil {
 204         return err
 205     }
 206 
 207     if got > 1 {
 208         w.WriteString("]\n")
 209     }
 210     return nil
 211 }
 212 
 213 func emitJSONL(w *bufio.Writer, rr *csv.Reader) error {
 214     var keys []string
 215 
 216     return loopCSV(rr, func(i int, row []string) error {
 217         if i == 0 {
 218             keys = make([]string, 0, len(row))
 219             for _, s := range row {
 220                 c := string(append([]byte{}, s...))
 221                 keys = append(keys, c)
 222             }
 223             return nil
 224         }
 225 
 226         w.WriteByte('{')
 227         for i, s := range row {
 228             if i > 0 {
 229                 w.WriteByte(',')
 230                 w.WriteByte(' ')
 231             }
 232 
 233             if numberLike(s) {
 234                 w.WriteByte('"')
 235                 writeInnerStringJSON(w, keys[i])
 236                 w.WriteString(`": `)
 237                 w.WriteString(s)
 238                 continue
 239             }
 240 
 241             writeKV(w, keys[i], s)
 242         }
 243 
 244         for i := len(row); i < len(keys); i++ {
 245             if i > 0 {
 246                 w.WriteByte(',')
 247                 w.WriteByte(' ')
 248             }
 249             w.WriteByte('"')
 250             writeInnerStringJSON(w, keys[i])
 251             w.WriteString(`": null`)
 252         }
 253         w.WriteByte('}')
 254 
 255         w.WriteByte('\n')
 256         if err := w.Flush(); err != nil {
 257             return noMoreOutput{}
 258         }
 259         return nil
 260     })
 261 }
 262 
 263 func emitJSONS(w *bufio.Writer, rr *csv.Reader) error {
 264     got := 0
 265     var keys []string
 266 
 267     err := loopCSV(rr, func(i int, row []string) error {
 268         got++
 269 
 270         if i == 0 {
 271             keys = make([]string, 0, len(row))
 272             for _, s := range row {
 273                 c := string(append([]byte{}, s...))
 274                 keys = append(keys, c)
 275             }
 276             return nil
 277         }
 278 
 279         if i == 1 {
 280             w.WriteByte('[')
 281         } else {
 282             err := w.WriteByte(',')
 283             if err != nil {
 284                 return noMoreOutput{}
 285             }
 286         }
 287 
 288         w.WriteByte('{')
 289         for i, s := range row {
 290             if i > 0 {
 291                 w.WriteByte(',')
 292             }
 293             writeKV(w, keys[i], s)
 294         }
 295 
 296         for i := len(row); i < len(keys); i++ {
 297             if i > 0 {
 298                 w.WriteByte(',')
 299             }
 300             w.WriteByte('"')
 301             writeInnerStringJSON(w, keys[i])
 302             w.WriteString(`":null`)
 303         }
 304         w.WriteByte('}')
 305 
 306         return nil
 307     })
 308 
 309     if err != nil {
 310         return err
 311     }
 312 
 313     if got > 1 {
 314         w.WriteString("]\n")
 315     }
 316     return nil
 317 }
 318 
 319 func emitTSV(w *bufio.Writer, rr *csv.Reader) error {
 320     width := -1
 321 
 322     return loopCSV(rr, func(i int, row []string) error {
 323         if width < 0 {
 324             width = len(row)
 325         }
 326 
 327         for i, s := range row {
 328             if strings.IndexByte(s, '\t') >= 0 {
 329                 const msg = `can't convert CSV whose items have tabs to TSV`
 330                 return errors.New(msg)
 331             }
 332             if i > 0 {
 333                 w.WriteByte('\t')
 334             }
 335             w.WriteString(s)
 336         }
 337 
 338         for i := len(row); i < width; i++ {
 339             w.WriteByte('\t')
 340         }
 341 
 342         w.WriteByte('\n')
 343         if err := w.Flush(); err != nil {
 344             // a write error may be the consequence of stdout being closed,
 345             // perhaps by another app along a pipe
 346             return noMoreOutput{}
 347         }
 348         return nil
 349     })
 350 }
 351 
 352 // writeInnerStringJSON helps JSON-encode strings more quickly
 353 func writeInnerStringJSON(w *bufio.Writer, s string) {
 354     needsEscaping := false
 355     for _, r := range s {
 356         if '#' <= r && r <= '~' && r != '\\' {
 357             continue
 358         }
 359         if r == ' ' || r == '!' || unicode.IsLetter(r) {
 360             continue
 361         }
 362 
 363         needsEscaping = true
 364         break
 365     }
 366 
 367     if !needsEscaping {
 368         w.WriteString(s)
 369         return
 370     }
 371 
 372     outer, err := json.Marshal(s)
 373     if err != nil {
 374         return
 375     }
 376     inner := outer[1 : len(outer)-1]
 377     w.Write(inner)
 378 }
 379 
 380 func writeKV(w *bufio.Writer, k string, s string) {
 381     w.WriteByte('"')
 382     writeInnerStringJSON(w, k)
 383     w.WriteString(`": "`)
 384     writeInnerStringJSON(w, s)
 385     w.WriteByte('"')
 386 }
 387 
 388 func numberLike(s string) bool {
 389     if len(s) == 0 {
 390         return false
 391     }
 392 
 393     if s[0] == '-' {
 394         s = s[1:]
 395     }
 396 
 397     if len(s) == 0 || s[0] < '0' || s[0] > '9' {
 398         return false
 399     }
 400 
 401     for len(s) > 0 {
 402         lead := s[0]
 403         s = s[1:]
 404 
 405         if lead == '.' {
 406             return allDigits(s)
 407         }
 408         if lead < '0' || lead > '9' {
 409             return false
 410         }
 411     }
 412 
 413     return true
 414 }
 415 
 416 func allDigits(s string) bool {
 417     if len(s) == 0 {
 418         return false
 419     }
 420 
 421     for _, r := range s {
 422         if r < '0' || r > '9' {
 423             return false
 424         }
 425     }
 426     return true
 427 }
 428 
 429 func makeRowReader(r io.Reader) *csv.Reader {
 430     rr := csv.NewReader(r)
 431     rr.LazyQuotes = true
 432     rr.ReuseRecord = true
 433     rr.FieldsPerRecord = -1
 434     return rr
 435 }
 436 
 437 func loopCSV(rr *csv.Reader, handle func(i int, row []string) error) error {
 438     width := 0
 439 
 440     for i := 0; true; i++ {
 441         row, err := rr.Read()
 442         if err == io.EOF {
 443             return nil
 444         }
 445 
 446         if err != nil {
 447             return err
 448         }
 449 
 450         if i == 0 {
 451             width = len(row)
 452         }
 453 
 454         if len(row) > width {
 455             return errors.New(`data-row has more items than the header`)
 456         }
 457 
 458         if err := handle(i, row); err != nil {
 459             return err
 460         }
 461     }
 462 
 463     return nil
 464 }