File: dejsonl.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath dejsonl.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 "strings" 41 ) 42 43 const info = ` 44 dejsonl [filepath...] 45 46 Turn JSON Lines (JSONL) into proper-JSON arrays. The JSON Lines format is 47 simply plain-text lines, where each line is valid JSON on its own. 48 ` 49 50 const indent = ` ` 51 52 // errNoMoreOutput is a generic dummy output-error, which is meant to be 53 // ultimately ignored, being just an excuse to quit the app immediately 54 // and successfully 55 var errNoMoreOutput = errors.New(`no more output`) 56 57 func main() { 58 buffered := false 59 args := os.Args[1:] 60 61 if len(args) > 0 { 62 switch args[0] { 63 case `-b`, `--b`, `-buffered`, `--buffered`: 64 buffered = true 65 args = args[1:] 66 67 case `-h`, `--h`, `-help`, `--help`: 68 os.Stdout.WriteString(info[1:]) 69 return 70 } 71 } 72 73 if len(args) > 0 && args[0] == `--` { 74 args = args[1:] 75 } 76 77 liveLines := !buffered 78 if !buffered { 79 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 80 liveLines = false 81 } 82 } 83 84 err := run(os.Stdout, os.Args[1:], liveLines) 85 if err != nil && err != io.EOF && err != errNoMoreOutput { 86 os.Stderr.WriteString(err.Error()) 87 os.Stderr.WriteString("\n") 88 os.Exit(1) 89 } 90 } 91 92 func run(w io.Writer, args []string, live bool) error { 93 dashes := 0 94 for _, path := range args { 95 if path == `-` { 96 dashes++ 97 } 98 if dashes > 1 { 99 return errors.New(`can't read stdin (dash) more than once`) 100 } 101 } 102 103 bw := bufio.NewWriter(w) 104 defer bw.Flush() 105 106 if len(args) == 0 { 107 return dejsonl(bw, os.Stdin, live) 108 } 109 110 for _, path := range args { 111 if err := handleInput(bw, path, live); err != nil { 112 return err 113 } 114 } 115 116 return nil 117 } 118 119 // handleInput simplifies control-flow for func main 120 func handleInput(w *bufio.Writer, path string, live bool) error { 121 if path == `-` { 122 return dejsonl(w, os.Stdin, live) 123 } 124 125 // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) { 126 // resp, err := http.Get(path) 127 // if err != nil { 128 // return err 129 // } 130 // defer resp.Body.Close() 131 // return dejsonl(w, resp.Body, live) 132 // } 133 134 f, err := os.Open(path) 135 if err != nil { 136 // on windows, file-not-found error messages may mention `CreateFile`, 137 // even when trying to open files in read-only mode 138 return errors.New(`can't open file named ` + path) 139 } 140 defer f.Close() 141 return dejsonl(w, f, live) 142 } 143 144 // dejsonl simplifies control-flow for func handleInput 145 func dejsonl(w *bufio.Writer, r io.Reader, live bool) error { 146 const gb = 1024 * 1024 * 1024 147 sc := bufio.NewScanner(r) 148 sc.Buffer(nil, 8*gb) 149 got := 0 150 151 for i := 0; sc.Scan(); i++ { 152 s := sc.Text() 153 if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { 154 s = s[3:] 155 } 156 157 // trim spaces at both ends of the current line 158 for len(s) > 0 && s[0] == ' ' { 159 s = s[1:] 160 } 161 for len(s) > 0 && s[len(s)-1] == ' ' { 162 s = s[:len(s)-1] 163 } 164 165 // ignore empty(ish) lines 166 if len(s) == 0 { 167 continue 168 } 169 170 // ignore lines starting with unix-style comments 171 if len(s) > 0 && s[0] == '#' { 172 continue 173 } 174 175 if err := checkJSONL(strings.NewReader(s)); err != nil { 176 return err 177 } 178 179 if got == 0 { 180 w.WriteByte('[') 181 } else { 182 w.WriteByte(',') 183 } 184 if w.WriteByte('\n') != nil { 185 return errNoMoreOutput 186 } 187 w.WriteString(indent) 188 w.WriteString(s) 189 got++ 190 191 if !live { 192 continue 193 } 194 195 if err := w.Flush(); err != nil { 196 return errNoMoreOutput 197 } 198 } 199 200 if got == 0 { 201 w.WriteString("[\n]\n") 202 } else { 203 w.WriteString("\n]\n") 204 } 205 return sc.Err() 206 } 207 208 func checkJSONL(r io.Reader) error { 209 dec := json.NewDecoder(r) 210 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 211 // even if JSON parsers aren't required to guarantee such input-fidelity 212 // for numbers 213 dec.UseNumber() 214 215 t, err := dec.Token() 216 if err == io.EOF { 217 return errors.New(`input has no JSON values`) 218 } 219 220 if err := checkToken(dec, t); err != nil { 221 return err 222 } 223 224 _, err = dec.Token() 225 if err == io.EOF { 226 // input is over, so it's a success 227 return nil 228 } 229 230 if err == nil { 231 // a successful `read` is a failure, as it means there are 232 // trailing JSON tokens 233 return errors.New(`unexpected trailing data`) 234 } 235 236 // any other error, perhaps some invalid-JSON-syntax-type error 237 return err 238 } 239 240 // checkToken handles recursion for func checkJSONL 241 func checkToken(dec *json.Decoder, t json.Token) error { 242 switch t := t.(type) { 243 case json.Delim: 244 switch t { 245 case json.Delim('['): 246 return checkArray(dec) 247 case json.Delim('{'): 248 return checkObject(dec) 249 default: 250 return errors.New(`unsupported JSON syntax ` + string(t)) 251 } 252 253 case nil, bool, float64, json.Number, string: 254 return nil 255 256 default: 257 // return fmt.Errorf(`unsupported token type %T`, t) 258 return errors.New(`invalid JSON token`) 259 } 260 } 261 262 // handleArray handles arrays for func checkToken 263 func checkArray(dec *json.Decoder) error { 264 for { 265 t, err := dec.Token() 266 if err != nil { 267 return err 268 } 269 270 if t == json.Delim(']') { 271 return nil 272 } 273 274 if err := checkToken(dec, t); err != nil { 275 return err 276 } 277 } 278 } 279 280 // handleObject handles objects for func checkToken 281 func checkObject(dec *json.Decoder) error { 282 for { 283 t, err := dec.Token() 284 if err != nil { 285 return err 286 } 287 288 if t == json.Delim('}') { 289 return nil 290 } 291 292 if _, ok := t.(string); !ok { 293 return errors.New(`expected a string for a key-value pair`) 294 } 295 296 t, err = dec.Token() 297 if err == io.EOF || t == json.Delim('}') { 298 return errors.New(`expected a value for a key-value pair`) 299 } 300 301 if err := checkToken(dec, t); err != nil { 302 return err 303 } 304 } 305 }