File: dejsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath dejsonl.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40     "strings"
  41 )
  42 
  43 const info = `
  44 dejsonl [filepath...]
  45 
  46 Turn JSON Lines (JSONL) into proper-JSON arrays. The JSON Lines format is
  47 simply plain-text lines, where each line is valid JSON on its own.
  48 `
  49 
  50 const indent = `  `
  51 
  52 // errNoMoreOutput is a generic dummy output-error, which is meant to be
  53 // ultimately ignored, being just an excuse to quit the app immediately
  54 // and successfully
  55 var errNoMoreOutput = errors.New(`no more output`)
  56 
  57 func main() {
  58     if len(os.Args) > 1 {
  59         switch os.Args[1] {
  60         case `-h`, `--h`, `-help`, `--help`:
  61             os.Stdout.WriteString(info[1:])
  62             return
  63         }
  64     }
  65 
  66     if len(os.Args) > 2 {
  67         os.Stderr.WriteString("multiple inputs not allowed\n")
  68         os.Exit(1)
  69     }
  70 
  71     // figure out whether input should come from a named file or from stdin
  72     path := `-`
  73     if len(os.Args) > 1 {
  74         path = os.Args[1]
  75     }
  76 
  77     liveLines := true
  78     if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
  79         liveLines = false
  80     }
  81 
  82     err := handleInput(os.Stdout, path, liveLines)
  83     if err != nil && err != io.EOF && err != errNoMoreOutput {
  84         os.Stderr.WriteString(err.Error())
  85         os.Stderr.WriteString("\n")
  86         os.Exit(1)
  87     }
  88 }
  89 
  90 // handleInput simplifies control-flow for func main
  91 func handleInput(w io.Writer, path string, live bool) error {
  92     if path == `-` {
  93         return dejsonl(w, os.Stdin, live)
  94     }
  95 
  96     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
  97     //  resp, err := http.Get(path)
  98     //  if err != nil {
  99     //      return err
 100     //  }
 101     //  defer resp.Body.Close()
 102     //  return dejsonl(w, resp.Body, live)
 103     // }
 104 
 105     f, err := os.Open(path)
 106     if err != nil {
 107         // on windows, file-not-found error messages may mention `CreateFile`,
 108         // even when trying to open files in read-only mode
 109         return errors.New(`can't open file named ` + path)
 110     }
 111     defer f.Close()
 112     return dejsonl(w, f, live)
 113 }
 114 
 115 // dejsonl simplifies control-flow for func handleInput
 116 func dejsonl(w io.Writer, r io.Reader, live bool) error {
 117     bw := bufio.NewWriter(w)
 118     defer bw.Flush()
 119 
 120     const gb = 1024 * 1024 * 1024
 121     sc := bufio.NewScanner(r)
 122     sc.Buffer(nil, 8*gb)
 123     got := 0
 124 
 125     for i := 0; sc.Scan(); i++ {
 126         s := sc.Text()
 127         if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") {
 128             s = s[3:]
 129         }
 130 
 131         // trim spaces at both ends of the current line
 132         for len(s) > 0 && s[0] == ' ' {
 133             s = s[1:]
 134         }
 135         for len(s) > 0 && s[len(s)-1] == ' ' {
 136             s = s[:len(s)-1]
 137         }
 138 
 139         // ignore empty(ish) lines
 140         if len(s) == 0 {
 141             continue
 142         }
 143 
 144         // ignore lines starting with unix-style comments
 145         if len(s) > 0 && s[0] == '#' {
 146             continue
 147         }
 148 
 149         if err := checkJSONL(strings.NewReader(s)); err != nil {
 150             return err
 151         }
 152 
 153         if got == 0 {
 154             bw.WriteByte('[')
 155         } else {
 156             bw.WriteByte(',')
 157         }
 158         if bw.WriteByte('\n') != nil {
 159             return errNoMoreOutput
 160         }
 161         bw.WriteString(indent)
 162         bw.WriteString(s)
 163         got++
 164 
 165         if !live {
 166             continue
 167         }
 168 
 169         if err := bw.Flush(); err != nil {
 170             return errNoMoreOutput
 171         }
 172     }
 173 
 174     if got == 0 {
 175         bw.WriteString("[\n]\n")
 176     } else {
 177         bw.WriteString("\n]\n")
 178     }
 179     return sc.Err()
 180 }
 181 
 182 func checkJSONL(r io.Reader) error {
 183     dec := json.NewDecoder(r)
 184     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 185     // even if JSON parsers aren't required to guarantee such input-fidelity
 186     // for numbers
 187     dec.UseNumber()
 188 
 189     t, err := dec.Token()
 190     if err == io.EOF {
 191         return errors.New(`input has no JSON values`)
 192     }
 193 
 194     if err := checkToken(dec, t); err != nil {
 195         return err
 196     }
 197 
 198     _, err = dec.Token()
 199     if err == io.EOF {
 200         // input is over, so it's a success
 201         return nil
 202     }
 203 
 204     if err == nil {
 205         // a successful `read` is a failure, as it means there are
 206         // trailing JSON tokens
 207         return errors.New(`unexpected trailing data`)
 208     }
 209 
 210     // any other error, perhaps some invalid-JSON-syntax-type error
 211     return err
 212 }
 213 
 214 // checkToken handles recursion for func checkJSONL
 215 func checkToken(dec *json.Decoder, t json.Token) error {
 216     switch t := t.(type) {
 217     case json.Delim:
 218         switch t {
 219         case json.Delim('['):
 220             return checkArray(dec)
 221         case json.Delim('{'):
 222             return checkObject(dec)
 223         default:
 224             return errors.New(`unsupported JSON syntax ` + string(t))
 225         }
 226 
 227     case nil, bool, float64, json.Number, string:
 228         return nil
 229 
 230     default:
 231         // return fmt.Errorf(`unsupported token type %T`, t)
 232         return errors.New(`invalid JSON token`)
 233     }
 234 }
 235 
 236 // handleArray handles arrays for func checkToken
 237 func checkArray(dec *json.Decoder) error {
 238     for {
 239         t, err := dec.Token()
 240         if err != nil {
 241             return err
 242         }
 243 
 244         if t == json.Delim(']') {
 245             return nil
 246         }
 247 
 248         if err := checkToken(dec, t); err != nil {
 249             return err
 250         }
 251     }
 252 
 253     // make the compiler happy
 254     return nil
 255 }
 256 
 257 // handleObject handles objects for func checkToken
 258 func checkObject(dec *json.Decoder) error {
 259     for {
 260         t, err := dec.Token()
 261         if err != nil {
 262             return err
 263         }
 264 
 265         if t == json.Delim('}') {
 266             return nil
 267         }
 268 
 269         if _, ok := t.(string); !ok {
 270             return errors.New(`expected a string for a key-value pair`)
 271         }
 272 
 273         t, err = dec.Token()
 274         if err == io.EOF || t == json.Delim('}') {
 275             return errors.New(`expected a value for a key-value pair`)
 276         }
 277 
 278         if err := checkToken(dec, t); err != nil {
 279             return err
 280         }
 281     }
 282 
 283     // make the compiler happy
 284     return nil
 285 }