File: decsv.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath decsv.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/csv" 37 "encoding/json" 38 "errors" 39 "io" 40 "os" 41 "strings" 42 "unicode" 43 ) 44 45 const info = ` 46 decsv [options...] [filepath...] 47 48 49 This cmd-line app turns CSV (comma-separated values) data into either TSV 50 (tab-separated values), JSONS (JSON Strings), or general JSON (JavaScript 51 Object Notation). 52 53 When not given a filepath, the input is read from the standard input. 54 55 Options, when given, can either start with a single or a double-dash: 56 57 -h show this help message 58 -help show this help message 59 60 -json emit JSON, where numbers are auto-detected 61 -jsonl emit JSON Lines, where numbers are auto-detected 62 -jsons emit JSON Strings, where object values are strings or null 63 -tsv emit TSV (tab-separated values) lines 64 ` 65 66 // noMoreOutput is a custom error-type meant to be deliberately ignored 67 type noMoreOutput struct{} 68 69 func (nmo noMoreOutput) Error() string { 70 return `no more output` 71 } 72 73 // handler is the type all CSV-converter funcs adhere to 74 type handler func(*bufio.Writer, *csv.Reader) error 75 76 var handlers = map[string]handler{ 77 `-json`: emitJSON, 78 `--json`: emitJSON, 79 `-jsonl`: emitJSONL, 80 `--jsonl`: emitJSONL, 81 `-jsons`: emitJSONS, 82 `--jsons`: emitJSONS, 83 `-tsv`: emitTSV, 84 `--tsv`: emitTSV, 85 } 86 87 func main() { 88 if len(os.Args) > 1 { 89 switch os.Args[1] { 90 case `-h`, `--h`, `-help`, `--help`: 91 os.Stdout.WriteString(info[1:]) 92 return 93 } 94 } 95 96 args := os.Args[1:] 97 emit := emitTSV 98 if len(args) > 0 { 99 if v, ok := handlers[args[0]]; ok { 100 emit = v 101 args = args[1:] 102 } 103 } 104 105 if len(args) > 1 { 106 os.Stdout.WriteString(info[1:]) 107 os.Exit(1) 108 } 109 110 path := `-` 111 if len(args) > 0 { 112 path = args[0] 113 } 114 115 err := handleInput(os.Stdout, os.Stdin, path, emit) 116 if _, ok := err.(noMoreOutput); ok { 117 return 118 } 119 120 if err != nil { 121 os.Stderr.WriteString("\x1b[31m") 122 os.Stderr.WriteString(err.Error()) 123 os.Stderr.WriteString("\x1b[0m\n") 124 os.Exit(1) 125 } 126 } 127 128 func handleInput(w io.Writer, r io.Reader, path string, handle handler) error { 129 bw := bufio.NewWriter(w) 130 defer bw.Flush() 131 132 if path == `-` { 133 return handle(bw, makeRowReader(r)) 134 } 135 136 f, err := os.Open(path) 137 if err != nil { 138 // on windows, file-not-found error messages may mention `CreateFile`, 139 // even when trying to open files in read-only mode 140 return errors.New(`can't open file named ` + path) 141 } 142 defer f.Close() 143 144 return handle(bw, makeRowReader(f)) 145 } 146 147 func emitJSON(w *bufio.Writer, rr *csv.Reader) error { 148 got := 0 149 var keys []string 150 151 err := loopCSV(rr, func(i int, row []string) error { 152 got++ 153 154 if i == 0 { 155 keys = make([]string, 0, len(row)) 156 for _, s := range row { 157 keys = append(keys, strings.Clone(s)) 158 } 159 return nil 160 } 161 162 if i == 1 { 163 w.WriteByte('[') 164 } else { 165 err := w.WriteByte(',') 166 if err != nil { 167 return noMoreOutput{} 168 } 169 } 170 171 w.WriteByte('{') 172 for i, s := range row { 173 if i > 0 { 174 w.WriteByte(',') 175 } 176 177 if numberLike(s) { 178 w.WriteByte('"') 179 writeInnerStringJSON(w, keys[i]) 180 w.WriteString(`":`) 181 w.WriteString(s) 182 continue 183 } 184 185 writeKV(w, keys[i], s) 186 } 187 188 for i := len(row); i < len(keys); i++ { 189 if i > 0 { 190 w.WriteByte(',') 191 } 192 w.WriteByte('"') 193 writeInnerStringJSON(w, keys[i]) 194 w.WriteString(`":null`) 195 } 196 w.WriteByte('}') 197 198 return nil 199 }) 200 201 if err != nil { 202 return err 203 } 204 205 if got > 1 { 206 w.WriteString("]\n") 207 } 208 return nil 209 } 210 211 func emitJSONL(w *bufio.Writer, rr *csv.Reader) error { 212 var keys []string 213 214 return loopCSV(rr, func(i int, row []string) error { 215 if i == 0 { 216 keys = make([]string, 0, len(row)) 217 for _, s := range row { 218 c := string(append([]byte{}, s...)) 219 keys = append(keys, c) 220 } 221 return nil 222 } 223 224 w.WriteByte('{') 225 for i, s := range row { 226 if i > 0 { 227 w.WriteByte(',') 228 w.WriteByte(' ') 229 } 230 231 if numberLike(s) { 232 w.WriteByte('"') 233 writeInnerStringJSON(w, keys[i]) 234 w.WriteString(`": `) 235 w.WriteString(s) 236 continue 237 } 238 239 writeKV(w, keys[i], s) 240 } 241 242 for i := len(row); i < len(keys); i++ { 243 if i > 0 { 244 w.WriteByte(',') 245 w.WriteByte(' ') 246 } 247 w.WriteByte('"') 248 writeInnerStringJSON(w, keys[i]) 249 w.WriteString(`": null`) 250 } 251 w.WriteByte('}') 252 253 w.WriteByte('\n') 254 if err := w.Flush(); err != nil { 255 return noMoreOutput{} 256 } 257 return nil 258 }) 259 } 260 261 func emitJSONS(w *bufio.Writer, rr *csv.Reader) error { 262 got := 0 263 var keys []string 264 265 err := loopCSV(rr, func(i int, row []string) error { 266 got++ 267 268 if i == 0 { 269 keys = make([]string, 0, len(row)) 270 for _, s := range row { 271 c := string(append([]byte{}, s...)) 272 keys = append(keys, c) 273 } 274 return nil 275 } 276 277 if i == 1 { 278 w.WriteByte('[') 279 } else { 280 err := w.WriteByte(',') 281 if err != nil { 282 return noMoreOutput{} 283 } 284 } 285 286 w.WriteByte('{') 287 for i, s := range row { 288 if i > 0 { 289 w.WriteByte(',') 290 } 291 writeKV(w, keys[i], s) 292 } 293 294 for i := len(row); i < len(keys); i++ { 295 if i > 0 { 296 w.WriteByte(',') 297 } 298 w.WriteByte('"') 299 writeInnerStringJSON(w, keys[i]) 300 w.WriteString(`":null`) 301 } 302 w.WriteByte('}') 303 304 return nil 305 }) 306 307 if err != nil { 308 return err 309 } 310 311 if got > 1 { 312 w.WriteString("]\n") 313 } 314 return nil 315 } 316 317 func emitTSV(w *bufio.Writer, rr *csv.Reader) error { 318 width := -1 319 320 return loopCSV(rr, func(i int, row []string) error { 321 if width < 0 { 322 width = len(row) 323 } 324 325 for i, s := range row { 326 if strings.IndexByte(s, '\t') >= 0 { 327 const msg = `can't convert CSV whose items have tabs to TSV` 328 return errors.New(msg) 329 } 330 if i > 0 { 331 w.WriteByte('\t') 332 } 333 w.WriteString(s) 334 } 335 336 for i := len(row); i < width; i++ { 337 w.WriteByte('\t') 338 } 339 340 w.WriteByte('\n') 341 if err := w.Flush(); err != nil { 342 // a write error may be the consequence of stdout being closed, 343 // perhaps by another app along a pipe 344 return noMoreOutput{} 345 } 346 return nil 347 }) 348 } 349 350 // writeInnerStringJSON helps JSON-encode strings more quickly 351 func writeInnerStringJSON(w *bufio.Writer, s string) { 352 needsEscaping := false 353 for _, r := range s { 354 if '#' <= r && r <= '~' && r != '\\' { 355 continue 356 } 357 if r == ' ' || r == '!' || unicode.IsLetter(r) { 358 continue 359 } 360 361 needsEscaping = true 362 break 363 } 364 365 if !needsEscaping { 366 w.WriteString(s) 367 return 368 } 369 370 outer, err := json.Marshal(s) 371 if err != nil { 372 return 373 } 374 inner := outer[1 : len(outer)-1] 375 w.Write(inner) 376 } 377 378 func writeKV(w *bufio.Writer, k string, s string) { 379 w.WriteByte('"') 380 writeInnerStringJSON(w, k) 381 w.WriteString(`": "`) 382 writeInnerStringJSON(w, s) 383 w.WriteByte('"') 384 } 385 386 func numberLike(s string) bool { 387 if len(s) == 0 { 388 return false 389 } 390 391 if s[0] == '-' { 392 s = s[1:] 393 } 394 395 if len(s) == 0 || s[0] < '0' || s[0] > '9' { 396 return false 397 } 398 399 for len(s) > 0 { 400 lead := s[0] 401 s = s[1:] 402 403 if lead == '.' { 404 return allDigits(s) 405 } 406 if lead < '0' || lead > '9' { 407 return false 408 } 409 } 410 411 return true 412 } 413 414 func allDigits(s string) bool { 415 if len(s) == 0 { 416 return false 417 } 418 419 for _, r := range s { 420 if r < '0' || r > '9' { 421 return false 422 } 423 } 424 return true 425 } 426 427 func makeRowReader(r io.Reader) *csv.Reader { 428 rr := csv.NewReader(r) 429 rr.LazyQuotes = true 430 rr.ReuseRecord = true 431 rr.FieldsPerRecord = -1 432 return rr 433 } 434 435 func loopCSV(rr *csv.Reader, handle func(i int, row []string) error) error { 436 width := 0 437 438 for i := 0; true; i++ { 439 row, err := rr.Read() 440 if err == io.EOF { 441 return nil 442 } 443 444 if err != nil { 445 return err 446 } 447 448 if i == 0 { 449 width = len(row) 450 } 451 452 if len(row) > width { 453 return errors.New(`data-row has more items than the header`) 454 } 455 456 if err := handle(i, row); err != nil { 457 return err 458 } 459 } 460 461 return nil 462 }