File: decsv.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath decsv.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/csv" 37 "encoding/json" 38 "errors" 39 "io" 40 "os" 41 "strings" 42 "unicode" 43 ) 44 45 const info = ` 46 decsv [options...] [filepath...] 47 48 49 This cmd-line app turns CSV (comma-separated values) data into either TSV 50 (tab-separated values), JSONS (JSON Strings), or general JSON (JavaScript 51 Object Notation). 52 53 When not given a filepath, the input is read from the standard input. 54 55 Options, when given, can either start with a single or a double-dash: 56 57 -h show this help message 58 -help show this help message 59 60 -json emit JSON, where numbers are auto-detected 61 -jsonl emit JSON Lines, where numbers are auto-detected 62 -jsons emit JSON Strings, where object values are strings or null 63 -tsv emit TSV (tab-separated values) lines 64 ` 65 66 // noMoreOutput is a custom error-type meant to be deliberately ignored 67 type noMoreOutput struct{} 68 69 func (nmo noMoreOutput) Error() string { 70 return `no more output` 71 } 72 73 // handler is the type all CSV-converter funcs adhere to 74 type handler func(*bufio.Writer, *csv.Reader) error 75 76 var handlers = map[string]handler{ 77 `-json`: emitJSON, 78 `--json`: emitJSON, 79 `-jsonl`: emitJSONL, 80 `--jsonl`: emitJSONL, 81 `-jsons`: emitJSONS, 82 `--jsons`: emitJSONS, 83 `-tsv`: emitTSV, 84 `--tsv`: emitTSV, 85 } 86 87 func main() { 88 if len(os.Args) > 1 { 89 switch os.Args[1] { 90 case `-h`, `--h`, `-help`, `--help`: 91 os.Stdout.WriteString(info[1:]) 92 return 93 } 94 } 95 96 args := os.Args[1:] 97 emit := emitTSV 98 if len(args) > 0 { 99 if v, ok := handlers[args[0]]; ok { 100 emit = v 101 args = args[1:] 102 } 103 } 104 105 if len(args) > 1 { 106 os.Stdout.WriteString(info[1:]) 107 os.Exit(1) 108 } 109 110 path := `-` 111 if len(args) > 0 { 112 path = args[0] 113 } 114 115 err := handleInput(os.Stdout, os.Stdin, path, emit) 116 if _, ok := err.(noMoreOutput); ok { 117 return 118 } 119 120 if err != nil { 121 os.Stderr.WriteString(err.Error()) 122 os.Stderr.WriteString("\n") 123 os.Exit(1) 124 } 125 } 126 127 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error { 128 bw := bufio.NewWriter(w) 129 defer bw.Flush() 130 131 if path == `-` { 132 return handle(bw, makeRowReader(r)) 133 } 134 135 f, err := os.Open(path) 136 if err != nil { 137 // on windows, file-not-found error messages may mention `CreateFile`, 138 // even when trying to open files in read-only mode 139 return errors.New(`can't open file named ` + path) 140 } 141 defer f.Close() 142 143 return handle(bw, makeRowReader(f)) 144 } 145 146 func emitJSON(w *bufio.Writer, rr *csv.Reader) error { 147 got := 0 148 var keys []string 149 150 err := loopCSV(rr, func(i int, row []string) error { 151 got++ 152 153 if i == 0 { 154 keys = make([]string, 0, len(row)) 155 for _, s := range row { 156 keys = append(keys, strings.Clone(s)) 157 } 158 return nil 159 } 160 161 if i == 1 { 162 w.WriteByte('[') 163 } else { 164 err := w.WriteByte(',') 165 if err != nil { 166 return noMoreOutput{} 167 } 168 } 169 170 w.WriteByte('{') 171 for i, s := range row { 172 if i > 0 { 173 w.WriteByte(',') 174 } 175 176 if numberLike(s) { 177 w.WriteByte('"') 178 writeInnerStringJSON(w, keys[i]) 179 w.WriteString(`":`) 180 w.WriteString(s) 181 continue 182 } 183 184 writeKV(w, keys[i], s) 185 } 186 187 for i := len(row); i < len(keys); i++ { 188 if i > 0 { 189 w.WriteByte(',') 190 } 191 w.WriteByte('"') 192 writeInnerStringJSON(w, keys[i]) 193 w.WriteString(`":null`) 194 } 195 w.WriteByte('}') 196 197 return nil 198 }) 199 200 if err != nil { 201 return err 202 } 203 204 if got > 1 { 205 w.WriteString("]\n") 206 } 207 return nil 208 } 209 210 func emitJSONL(w *bufio.Writer, rr *csv.Reader) error { 211 var keys []string 212 213 return loopCSV(rr, func(i int, row []string) error { 214 if i == 0 { 215 keys = make([]string, 0, len(row)) 216 for _, s := range row { 217 c := string(append([]byte{}, s...)) 218 keys = append(keys, c) 219 } 220 return nil 221 } 222 223 w.WriteByte('{') 224 for i, s := range row { 225 if i > 0 { 226 w.WriteByte(',') 227 w.WriteByte(' ') 228 } 229 230 if numberLike(s) { 231 w.WriteByte('"') 232 writeInnerStringJSON(w, keys[i]) 233 w.WriteString(`": `) 234 w.WriteString(s) 235 continue 236 } 237 238 writeKV(w, keys[i], s) 239 } 240 241 for i := len(row); i < len(keys); i++ { 242 if i > 0 { 243 w.WriteByte(',') 244 w.WriteByte(' ') 245 } 246 w.WriteByte('"') 247 writeInnerStringJSON(w, keys[i]) 248 w.WriteString(`": null`) 249 } 250 w.WriteByte('}') 251 252 w.WriteByte('\n') 253 if err := w.Flush(); err != nil { 254 return noMoreOutput{} 255 } 256 return nil 257 }) 258 } 259 260 func emitJSONS(w *bufio.Writer, rr *csv.Reader) error { 261 got := 0 262 var keys []string 263 264 err := loopCSV(rr, func(i int, row []string) error { 265 got++ 266 267 if i == 0 { 268 keys = make([]string, 0, len(row)) 269 for _, s := range row { 270 c := string(append([]byte{}, s...)) 271 keys = append(keys, c) 272 } 273 return nil 274 } 275 276 if i == 1 { 277 w.WriteByte('[') 278 } else { 279 err := w.WriteByte(',') 280 if err != nil { 281 return noMoreOutput{} 282 } 283 } 284 285 w.WriteByte('{') 286 for i, s := range row { 287 if i > 0 { 288 w.WriteByte(',') 289 } 290 writeKV(w, keys[i], s) 291 } 292 293 for i := len(row); i < len(keys); i++ { 294 if i > 0 { 295 w.WriteByte(',') 296 } 297 w.WriteByte('"') 298 writeInnerStringJSON(w, keys[i]) 299 w.WriteString(`":null`) 300 } 301 w.WriteByte('}') 302 303 return nil 304 }) 305 306 if err != nil { 307 return err 308 } 309 310 if got > 1 { 311 w.WriteString("]\n") 312 } 313 return nil 314 } 315 316 func emitTSV(w *bufio.Writer, rr *csv.Reader) error { 317 width := -1 318 319 return loopCSV(rr, func(i int, row []string) error { 320 if width < 0 { 321 width = len(row) 322 } 323 324 for i, s := range row { 325 if strings.IndexByte(s, '\t') >= 0 { 326 const msg = `can't convert CSV whose items have tabs to TSV` 327 return errors.New(msg) 328 } 329 if i > 0 { 330 w.WriteByte('\t') 331 } 332 w.WriteString(s) 333 } 334 335 for i := len(row); i < width; i++ { 336 w.WriteByte('\t') 337 } 338 339 w.WriteByte('\n') 340 if err := w.Flush(); err != nil { 341 // a write error may be the consequence of stdout being closed, 342 // perhaps by another app along a pipe 343 return noMoreOutput{} 344 } 345 return nil 346 }) 347 } 348 349 // writeInnerStringJSON helps JSON-encode strings more quickly 350 func writeInnerStringJSON(w *bufio.Writer, s string) { 351 needsEscaping := false 352 for _, r := range s { 353 if '#' <= r && r <= '~' && r != '\\' { 354 continue 355 } 356 if r == ' ' || r == '!' || unicode.IsLetter(r) { 357 continue 358 } 359 360 needsEscaping = true 361 break 362 } 363 364 if !needsEscaping { 365 w.WriteString(s) 366 return 367 } 368 369 outer, err := json.Marshal(s) 370 if err != nil { 371 return 372 } 373 inner := outer[1 : len(outer)-1] 374 w.Write(inner) 375 } 376 377 func writeKV(w *bufio.Writer, k string, s string) { 378 w.WriteByte('"') 379 writeInnerStringJSON(w, k) 380 w.WriteString(`": "`) 381 writeInnerStringJSON(w, s) 382 w.WriteByte('"') 383 } 384 385 func numberLike(s string) bool { 386 if len(s) == 0 { 387 return false 388 } 389 390 if s[0] == '-' { 391 s = s[1:] 392 } 393 394 if len(s) == 0 || s[0] < '0' || s[0] > '9' { 395 return false 396 } 397 398 for len(s) > 0 { 399 lead := s[0] 400 s = s[1:] 401 402 if lead == '.' { 403 return allDigits(s) 404 } 405 if lead < '0' || lead > '9' { 406 return false 407 } 408 } 409 410 return true 411 } 412 413 func allDigits(s string) bool { 414 if len(s) == 0 { 415 return false 416 } 417 418 for _, r := range s { 419 if r < '0' || r > '9' { 420 return false 421 } 422 } 423 return true 424 } 425 426 func makeRowReader(r io.Reader) *csv.Reader { 427 rr := csv.NewReader(r) 428 rr.LazyQuotes = true 429 rr.ReuseRecord = true 430 rr.FieldsPerRecord = -1 431 return rr 432 } 433 434 func loopCSV(rr *csv.Reader, handle func(i int, row []string) error) error { 435 width := 0 436 437 for i := 0; true; i++ { 438 row, err := rr.Read() 439 if err == io.EOF { 440 return nil 441 } 442 443 if err != nil { 444 return err 445 } 446 447 if i == 0 { 448 width = len(row) 449 } 450 451 if len(row) > width { 452 return errors.New(`data-row has more items than the header`) 453 } 454 455 if err := handle(i, row); err != nil { 456 return err 457 } 458 } 459 460 return nil 461 }