File: decsv.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for decsv. 27 28 To compile a smaller-sized command-line app, you can use the `go` command as 29 follows: 30 31 go build -ldflags "-s -w" -trimpath decsv.go 32 */ 33 34 package main 35 36 import ( 37 "bufio" 38 "encoding/csv" 39 "encoding/json" 40 "errors" 41 "io" 42 "os" 43 "strings" 44 "unicode" 45 ) 46 47 const info = ` 48 decsv [options...] [filepath...] 49 50 51 This cmd-line app turns CSV (comma-separated values) data into either TSV 52 (tab-separated values), JSONS (JSON Strings), or general JSON (JavaScript 53 Object Notation). 54 55 When not given a filepath, the input is read from the standard input. 56 57 Options, when given, can either start with a single or a double-dash: 58 59 -h show this help message 60 -help show this help message 61 62 -json emit JSON, where numbers are auto-detected 63 -jsonl emit JSON Lines, where numbers are auto-detected 64 -jsons emit JSON Strings, where object values are strings or null 65 -tsv emit TSV (tab-separated values) lines 66 ` 67 68 // noMoreOutput is a custom error-type meant to be deliberately ignored 69 type noMoreOutput struct{} 70 71 func (nmo noMoreOutput) Error() string { 72 return `no more output` 73 } 74 75 // handler is the type all CSV-converter funcs adhere to 76 type handler func(*bufio.Writer, *csv.Reader) error 77 78 var handlers = map[string]handler{ 79 `-json`: emitJSON, 80 `--json`: emitJSON, 81 `-jsonl`: emitJSONL, 82 `--jsonl`: emitJSONL, 83 `-jsons`: emitJSONS, 84 `--jsons`: emitJSONS, 85 `-tsv`: emitTSV, 86 `--tsv`: emitTSV, 87 } 88 89 func main() { 90 if len(os.Args) > 1 { 91 switch os.Args[1] { 92 case `-h`, `--h`, `-help`, `--help`: 93 os.Stdout.WriteString(info[1:]) 94 return 95 } 96 } 97 98 args := os.Args[1:] 99 emit := emitTSV 100 if len(args) > 0 { 101 if v, ok := handlers[args[0]]; ok { 102 emit = v 103 args = args[1:] 104 } 105 } 106 107 if len(args) > 1 { 108 os.Stdout.WriteString(info[1:]) 109 os.Exit(1) 110 } 111 112 path := `-` 113 if len(args) > 0 { 114 path = args[0] 115 } 116 117 err := handleInput(os.Stdout, os.Stdin, path, emit) 118 if _, ok := err.(noMoreOutput); ok { 119 return 120 } 121 122 if err != nil { 123 os.Stderr.WriteString("\x1b[31m") 124 os.Stderr.WriteString(err.Error()) 125 os.Stderr.WriteString("\x1b[0m\n") 126 os.Exit(1) 127 } 128 } 129 130 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error { 131 bw := bufio.NewWriter(w) 132 defer bw.Flush() 133 134 if path == `-` { 135 return handle(bw, makeRowReader(r)) 136 } 137 138 f, err := os.Open(path) 139 if err != nil { 140 // on windows, file-not-found error messages may mention `CreateFile`, 141 // even when trying to open files in read-only mode 142 return errors.New(`can't open file named ` + path) 143 } 144 defer f.Close() 145 146 return handle(bw, makeRowReader(f)) 147 } 148 149 func emitJSON(w *bufio.Writer, rr *csv.Reader) error { 150 got := 0 151 var keys []string 152 153 err := loopCSV(rr, func(i int, row []string) error { 154 got++ 155 156 if i == 0 { 157 keys = make([]string, 0, len(row)) 158 for _, s := range row { 159 keys = append(keys, strings.Clone(s)) 160 } 161 return nil 162 } 163 164 if i == 1 { 165 w.WriteByte('[') 166 } else { 167 err := w.WriteByte(',') 168 if err != nil { 169 return noMoreOutput{} 170 } 171 } 172 173 w.WriteByte('{') 174 for i, s := range row { 175 if i > 0 { 176 w.WriteByte(',') 177 } 178 179 if numberLike(s) { 180 w.WriteByte('"') 181 writeInnerStringJSON(w, keys[i]) 182 w.WriteString(`":`) 183 w.WriteString(s) 184 continue 185 } 186 187 writeKV(w, keys[i], s) 188 } 189 190 for i := len(row); i < len(keys); i++ { 191 if i > 0 { 192 w.WriteByte(',') 193 } 194 w.WriteByte('"') 195 writeInnerStringJSON(w, keys[i]) 196 w.WriteString(`":null`) 197 } 198 w.WriteByte('}') 199 200 return nil 201 }) 202 203 if err != nil { 204 return err 205 } 206 207 if got > 1 { 208 w.WriteString("]\n") 209 } 210 return nil 211 } 212 213 func emitJSONL(w *bufio.Writer, rr *csv.Reader) error { 214 var keys []string 215 216 return loopCSV(rr, func(i int, row []string) error { 217 if i == 0 { 218 keys = make([]string, 0, len(row)) 219 for _, s := range row { 220 c := string(append([]byte{}, s...)) 221 keys = append(keys, c) 222 } 223 return nil 224 } 225 226 w.WriteByte('{') 227 for i, s := range row { 228 if i > 0 { 229 w.WriteByte(',') 230 w.WriteByte(' ') 231 } 232 233 if numberLike(s) { 234 w.WriteByte('"') 235 writeInnerStringJSON(w, keys[i]) 236 w.WriteString(`": `) 237 w.WriteString(s) 238 continue 239 } 240 241 writeKV(w, keys[i], s) 242 } 243 244 for i := len(row); i < len(keys); i++ { 245 if i > 0 { 246 w.WriteByte(',') 247 w.WriteByte(' ') 248 } 249 w.WriteByte('"') 250 writeInnerStringJSON(w, keys[i]) 251 w.WriteString(`": null`) 252 } 253 w.WriteByte('}') 254 255 w.WriteByte('\n') 256 if err := w.Flush(); err != nil { 257 return noMoreOutput{} 258 } 259 return nil 260 }) 261 } 262 263 func emitJSONS(w *bufio.Writer, rr *csv.Reader) error { 264 got := 0 265 var keys []string 266 267 err := loopCSV(rr, func(i int, row []string) error { 268 got++ 269 270 if i == 0 { 271 keys = make([]string, 0, len(row)) 272 for _, s := range row { 273 c := string(append([]byte{}, s...)) 274 keys = append(keys, c) 275 } 276 return nil 277 } 278 279 if i == 1 { 280 w.WriteByte('[') 281 } else { 282 err := w.WriteByte(',') 283 if err != nil { 284 return noMoreOutput{} 285 } 286 } 287 288 w.WriteByte('{') 289 for i, s := range row { 290 if i > 0 { 291 w.WriteByte(',') 292 } 293 writeKV(w, keys[i], s) 294 } 295 296 for i := len(row); i < len(keys); i++ { 297 if i > 0 { 298 w.WriteByte(',') 299 } 300 w.WriteByte('"') 301 writeInnerStringJSON(w, keys[i]) 302 w.WriteString(`":null`) 303 } 304 w.WriteByte('}') 305 306 return nil 307 }) 308 309 if err != nil { 310 return err 311 } 312 313 if got > 1 { 314 w.WriteString("]\n") 315 } 316 return nil 317 } 318 319 func emitTSV(w *bufio.Writer, rr *csv.Reader) error { 320 width := -1 321 322 return loopCSV(rr, func(i int, row []string) error { 323 if width < 0 { 324 width = len(row) 325 } 326 327 for i, s := range row { 328 if strings.IndexByte(s, '\t') >= 0 { 329 const msg = `can't convert CSV whose items have tabs to TSV` 330 return errors.New(msg) 331 } 332 if i > 0 { 333 w.WriteByte('\t') 334 } 335 w.WriteString(s) 336 } 337 338 for i := len(row); i < width; i++ { 339 w.WriteByte('\t') 340 } 341 342 w.WriteByte('\n') 343 if err := w.Flush(); err != nil { 344 // a write error may be the consequence of stdout being closed, 345 // perhaps by another app along a pipe 346 return noMoreOutput{} 347 } 348 return nil 349 }) 350 } 351 352 // writeInnerStringJSON helps JSON-encode strings more quickly 353 func writeInnerStringJSON(w *bufio.Writer, s string) { 354 needsEscaping := false 355 for _, r := range s { 356 if '#' <= r && r <= '~' && r != '\\' { 357 continue 358 } 359 if r == ' ' || r == '!' || unicode.IsLetter(r) { 360 continue 361 } 362 363 needsEscaping = true 364 break 365 } 366 367 if !needsEscaping { 368 w.WriteString(s) 369 return 370 } 371 372 outer, err := json.Marshal(s) 373 if err != nil { 374 return 375 } 376 inner := outer[1 : len(outer)-1] 377 w.Write(inner) 378 } 379 380 func writeKV(w *bufio.Writer, k string, s string) { 381 w.WriteByte('"') 382 writeInnerStringJSON(w, k) 383 w.WriteString(`": "`) 384 writeInnerStringJSON(w, s) 385 w.WriteByte('"') 386 } 387 388 func numberLike(s string) bool { 389 if len(s) == 0 { 390 return false 391 } 392 393 if s[0] == '-' { 394 s = s[1:] 395 } 396 397 if len(s) == 0 || s[0] < '0' || s[0] > '9' { 398 return false 399 } 400 401 for len(s) > 0 { 402 lead := s[0] 403 s = s[1:] 404 405 if lead == '.' { 406 return allDigits(s) 407 } 408 if lead < '0' || lead > '9' { 409 return false 410 } 411 } 412 413 return true 414 } 415 416 func allDigits(s string) bool { 417 if len(s) == 0 { 418 return false 419 } 420 421 for _, r := range s { 422 if r < '0' || r > '9' { 423 return false 424 } 425 } 426 return true 427 } 428 429 func makeRowReader(r io.Reader) *csv.Reader { 430 rr := csv.NewReader(r) 431 rr.LazyQuotes = true 432 rr.ReuseRecord = true 433 rr.FieldsPerRecord = -1 434 return rr 435 } 436 437 func loopCSV(rr *csv.Reader, handle func(i int, row []string) error) error { 438 width := 0 439 440 for i := 0; true; i++ { 441 row, err := rr.Read() 442 if err == io.EOF { 443 return nil 444 } 445 446 if err != nil { 447 return err 448 } 449 450 if i == 0 { 451 width = len(row) 452 } 453 454 if len(row) > width { 455 return errors.New(`data-row has more items than the header`) 456 } 457 458 if err := handle(i, row); err != nil { 459 return err 460 } 461 } 462 463 return nil 464 }