File: dejsonl.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath dejsonl.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 "strings" 41 ) 42 43 const info = ` 44 dejsonl [filepath...] 45 46 Turn JSON Lines (JSONL) into proper-JSON arrays. The JSON Lines format is 47 simply plain-text lines, where each line is valid JSON on its own. 48 ` 49 50 const indent = ` ` 51 52 // errNoMoreOutput is a generic dummy output-error, which is meant to be 53 // ultimately ignored, being just an excuse to quit the app immediately 54 // and successfully 55 var errNoMoreOutput = errors.New(`no more output`) 56 57 func main() { 58 if len(os.Args) > 1 { 59 switch os.Args[1] { 60 case `-h`, `--h`, `-help`, `--help`: 61 os.Stdout.WriteString(info[1:]) 62 return 63 } 64 } 65 66 if len(os.Args) > 2 { 67 os.Stderr.WriteString("multiple inputs not allowed\n") 68 os.Exit(1) 69 } 70 71 // figure out whether input should come from a named file or from stdin 72 path := `-` 73 if len(os.Args) > 1 { 74 path = os.Args[1] 75 } 76 77 liveLines := true 78 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 79 liveLines = false 80 } 81 82 err := handleInput(os.Stdout, path, liveLines) 83 if err != nil && err != io.EOF && err != errNoMoreOutput { 84 os.Stderr.WriteString(err.Error()) 85 os.Stderr.WriteString("\n") 86 os.Exit(1) 87 } 88 } 89 90 // handleInput simplifies control-flow for func main 91 func handleInput(w io.Writer, path string, live bool) error { 92 if path == `-` { 93 return dejsonl(w, os.Stdin, live) 94 } 95 96 // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) { 97 // resp, err := http.Get(path) 98 // if err != nil { 99 // return err 100 // } 101 // defer resp.Body.Close() 102 // return dejsonl(w, resp.Body, live) 103 // } 104 105 f, err := os.Open(path) 106 if err != nil { 107 // on windows, file-not-found error messages may mention `CreateFile`, 108 // even when trying to open files in read-only mode 109 return errors.New(`can't open file named ` + path) 110 } 111 defer f.Close() 112 return dejsonl(w, f, live) 113 } 114 115 // dejsonl simplifies control-flow for func handleInput 116 func dejsonl(w io.Writer, r io.Reader, live bool) error { 117 bw := bufio.NewWriter(w) 118 defer bw.Flush() 119 120 const gb = 1024 * 1024 * 1024 121 sc := bufio.NewScanner(r) 122 sc.Buffer(nil, 8*gb) 123 got := 0 124 125 for i := 0; sc.Scan(); i++ { 126 s := sc.Text() 127 if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { 128 s = s[3:] 129 } 130 131 // trim spaces at both ends of the current line 132 for len(s) > 0 && s[0] == ' ' { 133 s = s[1:] 134 } 135 for len(s) > 0 && s[len(s)-1] == ' ' { 136 s = s[:len(s)-1] 137 } 138 139 // ignore empty(ish) lines 140 if len(s) == 0 { 141 continue 142 } 143 144 // ignore lines starting with unix-style comments 145 if len(s) > 0 && s[0] == '#' { 146 continue 147 } 148 149 if err := checkJSONL(strings.NewReader(s)); err != nil { 150 return err 151 } 152 153 if got == 0 { 154 bw.WriteByte('[') 155 } else { 156 bw.WriteByte(',') 157 } 158 if bw.WriteByte('\n') != nil { 159 return errNoMoreOutput 160 } 161 bw.WriteString(indent) 162 bw.WriteString(s) 163 got++ 164 165 if !live { 166 continue 167 } 168 169 if err := bw.Flush(); err != nil { 170 return errNoMoreOutput 171 } 172 } 173 174 if got == 0 { 175 bw.WriteString("[\n]\n") 176 } else { 177 bw.WriteString("\n]\n") 178 } 179 return sc.Err() 180 } 181 182 func checkJSONL(r io.Reader) error { 183 dec := json.NewDecoder(r) 184 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 185 // even if JSON parsers aren't required to guarantee such input-fidelity 186 // for numbers 187 dec.UseNumber() 188 189 t, err := dec.Token() 190 if err == io.EOF { 191 return errors.New(`input has no JSON values`) 192 } 193 194 if err := checkToken(dec, t); err != nil { 195 return err 196 } 197 198 _, err = dec.Token() 199 if err == io.EOF { 200 // input is over, so it's a success 201 return nil 202 } 203 204 if err == nil { 205 // a successful `read` is a failure, as it means there are 206 // trailing JSON tokens 207 return errors.New(`unexpected trailing data`) 208 } 209 210 // any other error, perhaps some invalid-JSON-syntax-type error 211 return err 212 } 213 214 // checkToken handles recursion for func checkJSONL 215 func checkToken(dec *json.Decoder, t json.Token) error { 216 switch t := t.(type) { 217 case json.Delim: 218 switch t { 219 case json.Delim('['): 220 return checkArray(dec) 221 case json.Delim('{'): 222 return checkObject(dec) 223 default: 224 return errors.New(`unsupported JSON syntax ` + string(t)) 225 } 226 227 case nil, bool, float64, json.Number, string: 228 return nil 229 230 default: 231 // return fmt.Errorf(`unsupported token type %T`, t) 232 return errors.New(`invalid JSON token`) 233 } 234 } 235 236 // handleArray handles arrays for func checkToken 237 func checkArray(dec *json.Decoder) error { 238 for { 239 t, err := dec.Token() 240 if err != nil { 241 return err 242 } 243 244 if t == json.Delim(']') { 245 return nil 246 } 247 248 if err := checkToken(dec, t); err != nil { 249 return err 250 } 251 } 252 253 // make the compiler happy 254 return nil 255 } 256 257 // handleObject handles objects for func checkToken 258 func checkObject(dec *json.Decoder) error { 259 for { 260 t, err := dec.Token() 261 if err != nil { 262 return err 263 } 264 265 if t == json.Delim('}') { 266 return nil 267 } 268 269 if _, ok := t.(string); !ok { 270 return errors.New(`expected a string for a key-value pair`) 271 } 272 273 t, err = dec.Token() 274 if err == io.EOF || t == json.Delim('}') { 275 return errors.New(`expected a value for a key-value pair`) 276 } 277 278 if err := checkToken(dec, t); err != nil { 279 return err 280 } 281 } 282 283 // make the compiler happy 284 return nil 285 }