/* The MIT License (MIT) Copyright © 2020-2025 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Single-file source-code for decsv. To compile a smaller-sized command-line app, you can use the `go` command as follows: go build -ldflags "-s -w" -trimpath decsv.go */ package main import ( "bufio" "encoding/csv" "encoding/json" "errors" "io" "os" "strings" "unicode" ) const info = ` decsv [options...] [filepath...] This cmd-line app turns CSV (comma-separated values) data into either TSV (tab-separated values), JSONS (JSON Strings), or general JSON (JavaScript Object Notation). When not given a filepath, the input is read from the standard input. Options, when given, can either start with a single or a double-dash: -h show this help message -help show this help message -json emit JSON, where numbers are auto-detected -jsonl emit JSON Lines, where numbers are auto-detected -jsons emit JSON Strings, where object values are strings or null -tsv emit TSV (tab-separated values) lines ` // noMoreOutput is a custom error-type meant to be deliberately ignored type noMoreOutput struct{} func (nmo noMoreOutput) Error() string { return `no more output` } // handler is the type all CSV-converter funcs adhere to type handler func(*bufio.Writer, *csv.Reader) error var handlers = map[string]handler{ `-json`: emitJSON, `--json`: emitJSON, `-jsonl`: emitJSONL, `--jsonl`: emitJSONL, `-jsons`: emitJSONS, `--jsons`: emitJSONS, `-tsv`: emitTSV, `--tsv`: emitTSV, } func main() { if len(os.Args) > 1 { switch os.Args[1] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(info[1:]) return } } args := os.Args[1:] emit := emitTSV if len(args) > 0 { if v, ok := handlers[args[0]]; ok { emit = v args = args[1:] } } if len(args) > 1 { os.Stdout.WriteString(info[1:]) os.Exit(1) } path := `-` if len(args) > 0 { path = args[0] } err := handleInput(os.Stdout, os.Stdin, path, emit) if _, ok := err.(noMoreOutput); ok { return } if err != nil { os.Stderr.WriteString("\x1b[31m") os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\x1b[0m\n") os.Exit(1) } } func handleInput(w io.Writer, r io.Reader, path string, handle handler) error { bw := bufio.NewWriter(w) defer bw.Flush() if path == `-` { return handle(bw, makeRowReader(r)) } f, err := os.Open(path) if err != nil { // on windows, file-not-found error messages may mention `CreateFile`, // even when trying to open files in read-only mode return errors.New(`can't open file named ` + path) } defer f.Close() return handle(bw, makeRowReader(f)) } func emitJSON(w *bufio.Writer, rr *csv.Reader) error { got := 0 var keys []string err := loopCSV(rr, func(i int, row []string) error { got++ if i == 0 { keys = make([]string, 0, len(row)) for _, s := range row { keys = append(keys, strings.Clone(s)) } return nil } if i == 1 { w.WriteByte('[') } else { err := w.WriteByte(',') if err != nil { return noMoreOutput{} } } w.WriteByte('{') for i, s := range row { if i > 0 { w.WriteByte(',') } if numberLike(s) { w.WriteByte('"') writeInnerStringJSON(w, keys[i]) w.WriteString(`":`) w.WriteString(s) continue } writeKV(w, keys[i], s) } for i := len(row); i < len(keys); i++ { if i > 0 { w.WriteByte(',') } w.WriteByte('"') writeInnerStringJSON(w, keys[i]) w.WriteString(`":null`) } w.WriteByte('}') return nil }) if err != nil { return err } if got > 1 { w.WriteString("]\n") } return nil } func emitJSONL(w *bufio.Writer, rr *csv.Reader) error { var keys []string return loopCSV(rr, func(i int, row []string) error { if i == 0 { keys = make([]string, 0, len(row)) for _, s := range row { c := string(append([]byte{}, s...)) keys = append(keys, c) } return nil } w.WriteByte('{') for i, s := range row { if i > 0 { w.WriteByte(',') w.WriteByte(' ') } if numberLike(s) { w.WriteByte('"') writeInnerStringJSON(w, keys[i]) w.WriteString(`": `) w.WriteString(s) continue } writeKV(w, keys[i], s) } for i := len(row); i < len(keys); i++ { if i > 0 { w.WriteByte(',') w.WriteByte(' ') } w.WriteByte('"') writeInnerStringJSON(w, keys[i]) w.WriteString(`": null`) } w.WriteByte('}') w.WriteByte('\n') if err := w.Flush(); err != nil { return noMoreOutput{} } return nil }) } func emitJSONS(w *bufio.Writer, rr *csv.Reader) error { got := 0 var keys []string err := loopCSV(rr, func(i int, row []string) error { got++ if i == 0 { keys = make([]string, 0, len(row)) for _, s := range row { c := string(append([]byte{}, s...)) keys = append(keys, c) } return nil } if i == 1 { w.WriteByte('[') } else { err := w.WriteByte(',') if err != nil { return noMoreOutput{} } } w.WriteByte('{') for i, s := range row { if i > 0 { w.WriteByte(',') } writeKV(w, keys[i], s) } for i := len(row); i < len(keys); i++ { if i > 0 { w.WriteByte(',') } w.WriteByte('"') writeInnerStringJSON(w, keys[i]) w.WriteString(`":null`) } w.WriteByte('}') return nil }) if err != nil { return err } if got > 1 { w.WriteString("]\n") } return nil } func emitTSV(w *bufio.Writer, rr *csv.Reader) error { width := -1 return loopCSV(rr, func(i int, row []string) error { if width < 0 { width = len(row) } for i, s := range row { if strings.IndexByte(s, '\t') >= 0 { const msg = `can't convert CSV whose items have tabs to TSV` return errors.New(msg) } if i > 0 { w.WriteByte('\t') } w.WriteString(s) } for i := len(row); i < width; i++ { w.WriteByte('\t') } w.WriteByte('\n') if err := w.Flush(); err != nil { // a write error may be the consequence of stdout being closed, // perhaps by another app along a pipe return noMoreOutput{} } return nil }) } // writeInnerStringJSON helps JSON-encode strings more quickly func writeInnerStringJSON(w *bufio.Writer, s string) { needsEscaping := false for _, r := range s { if '#' <= r && r <= '~' && r != '\\' { continue } if r == ' ' || r == '!' || unicode.IsLetter(r) { continue } needsEscaping = true break } if !needsEscaping { w.WriteString(s) return } outer, err := json.Marshal(s) if err != nil { return } inner := outer[1 : len(outer)-1] w.Write(inner) } func writeKV(w *bufio.Writer, k string, s string) { w.WriteByte('"') writeInnerStringJSON(w, k) w.WriteString(`": "`) writeInnerStringJSON(w, s) w.WriteByte('"') } func numberLike(s string) bool { if len(s) == 0 { return false } if s[0] == '-' { s = s[1:] } if len(s) == 0 || s[0] < '0' || s[0] > '9' { return false } for len(s) > 0 { lead := s[0] s = s[1:] if lead == '.' { return allDigits(s) } if lead < '0' || lead > '9' { return false } } return true } func allDigits(s string) bool { if len(s) == 0 { return false } for _, r := range s { if r < '0' || r > '9' { return false } } return true } func makeRowReader(r io.Reader) *csv.Reader { rr := csv.NewReader(r) rr.LazyQuotes = true rr.ReuseRecord = true rr.FieldsPerRecord = -1 return rr } func loopCSV(rr *csv.Reader, handle func(i int, row []string) error) error { width := 0 for i := 0; true; i++ { row, err := rr.Read() if err == io.EOF { return nil } if err != nil { return err } if i == 0 { width = len(row) } if len(row) > width { return errors.New(`data-row has more items than the header`) } if err := handle(i, row); err != nil { return err } } return nil }