File: dejsonl.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath dejsonl.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 "strings" 41 ) 42 43 const info = ` 44 dejsonl [filepath...] 45 46 Turn JSON Lines (JSONL) into proper-JSON arrays. The JSON Lines format is 47 simply plain-text lines, where each line is valid JSON on its own. 48 ` 49 50 const indent = ` ` 51 52 func main() { 53 buffered := false 54 args := os.Args[1:] 55 56 if len(args) > 0 { 57 switch args[0] { 58 case `-b`, `--b`, `-buffered`, `--buffered`: 59 buffered = true 60 args = args[1:] 61 62 case `-h`, `--h`, `-help`, `--help`: 63 os.Stdout.WriteString(info[1:]) 64 return 65 } 66 } 67 68 if len(args) > 0 && args[0] == `--` { 69 args = args[1:] 70 } 71 72 liveLines := !buffered 73 if !buffered { 74 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 75 liveLines = false 76 } 77 } 78 79 err := run(os.Stdout, os.Args[1:], liveLines) 80 if err != nil && err != io.EOF { 81 os.Stderr.WriteString(err.Error()) 82 os.Stderr.WriteString("\n") 83 os.Exit(1) 84 } 85 } 86 87 func run(w io.Writer, args []string, live bool) error { 88 dashes := 0 89 for _, path := range args { 90 if path == `-` { 91 dashes++ 92 } 93 if dashes > 1 { 94 return errors.New(`can't read stdin (dash) more than once`) 95 } 96 } 97 98 bw := bufio.NewWriter(w) 99 defer bw.Flush() 100 101 if len(args) == 0 { 102 return dejsonl(bw, os.Stdin, live) 103 } 104 105 for _, path := range args { 106 if err := handleInput(bw, path, live); err != nil { 107 return err 108 } 109 } 110 111 return nil 112 } 113 114 // handleInput simplifies control-flow for func main 115 func handleInput(w *bufio.Writer, path string, live bool) error { 116 if path == `-` { 117 return dejsonl(w, os.Stdin, live) 118 } 119 120 // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) { 121 // resp, err := http.Get(path) 122 // if err != nil { 123 // return err 124 // } 125 // defer resp.Body.Close() 126 // return dejsonl(w, resp.Body, live) 127 // } 128 129 f, err := os.Open(path) 130 if err != nil { 131 // on windows, file-not-found error messages may mention `CreateFile`, 132 // even when trying to open files in read-only mode 133 return errors.New(`can't open file named ` + path) 134 } 135 defer f.Close() 136 return dejsonl(w, f, live) 137 } 138 139 // dejsonl simplifies control-flow for func handleInput 140 func dejsonl(w *bufio.Writer, r io.Reader, live bool) error { 141 const gb = 1024 * 1024 * 1024 142 sc := bufio.NewScanner(r) 143 sc.Buffer(nil, 8*gb) 144 got := 0 145 146 for i := 0; sc.Scan(); i++ { 147 s := sc.Text() 148 if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { 149 s = s[3:] 150 } 151 152 // trim spaces at both ends of the current line 153 for len(s) > 0 && s[0] == ' ' { 154 s = s[1:] 155 } 156 for len(s) > 0 && s[len(s)-1] == ' ' { 157 s = s[:len(s)-1] 158 } 159 160 // ignore empty(ish) lines 161 if len(s) == 0 { 162 continue 163 } 164 165 // ignore lines starting with unix-style comments 166 if len(s) > 0 && s[0] == '#' { 167 continue 168 } 169 170 if err := checkJSONL(strings.NewReader(s)); err != nil { 171 return err 172 } 173 174 if got == 0 { 175 w.WriteByte('[') 176 } else { 177 w.WriteByte(',') 178 } 179 if w.WriteByte('\n') != nil { 180 return io.EOF 181 } 182 w.WriteString(indent) 183 w.WriteString(s) 184 got++ 185 186 if !live { 187 continue 188 } 189 190 if err := w.Flush(); err != nil { 191 return io.EOF 192 } 193 } 194 195 if got == 0 { 196 w.WriteString("[\n]\n") 197 } else { 198 w.WriteString("\n]\n") 199 } 200 return sc.Err() 201 } 202 203 func checkJSONL(r io.Reader) error { 204 dec := json.NewDecoder(r) 205 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 206 // even if JSON parsers aren't required to guarantee such input-fidelity 207 // for numbers 208 dec.UseNumber() 209 210 t, err := dec.Token() 211 if err == io.EOF { 212 return errors.New(`input has no JSON values`) 213 } 214 215 if err := checkToken(dec, t); err != nil { 216 return err 217 } 218 219 _, err = dec.Token() 220 if err == io.EOF { 221 // input is over, so it's a success 222 return nil 223 } 224 225 if err == nil { 226 // a successful `read` is a failure, as it means there are 227 // trailing JSON tokens 228 return errors.New(`unexpected trailing data`) 229 } 230 231 // any other error, perhaps some invalid-JSON-syntax-type error 232 return err 233 } 234 235 // checkToken handles recursion for func checkJSONL 236 func checkToken(dec *json.Decoder, t json.Token) error { 237 switch t := t.(type) { 238 case json.Delim: 239 switch t { 240 case json.Delim('['): 241 return checkArray(dec) 242 case json.Delim('{'): 243 return checkObject(dec) 244 default: 245 return errors.New(`unsupported JSON syntax ` + string(t)) 246 } 247 248 case nil, bool, float64, json.Number, string: 249 return nil 250 251 default: 252 // return fmt.Errorf(`unsupported token type %T`, t) 253 return errors.New(`invalid JSON token`) 254 } 255 } 256 257 // handleArray handles arrays for func checkToken 258 func checkArray(dec *json.Decoder) error { 259 for { 260 t, err := dec.Token() 261 if err != nil { 262 return err 263 } 264 265 if t == json.Delim(']') { 266 return nil 267 } 268 269 if err := checkToken(dec, t); err != nil { 270 return err 271 } 272 } 273 } 274 275 // handleObject handles objects for func checkToken 276 func checkObject(dec *json.Decoder) error { 277 for { 278 t, err := dec.Token() 279 if err != nil { 280 return err 281 } 282 283 if t == json.Delim('}') { 284 return nil 285 } 286 287 if _, ok := t.(string); !ok { 288 return errors.New(`expected a string for a key-value pair`) 289 } 290 291 t, err = dec.Token() 292 if err == io.EOF || t == json.Delim('}') { 293 return errors.New(`expected a value for a key-value pair`) 294 } 295 296 if err := checkToken(dec, t); err != nil { 297 return err 298 } 299 } 300 }