File: dejsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath dejsonl.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40     "strings"
  41 )
  42 
  43 const info = `
  44 dejsonl [filepath...]
  45 
  46 Turn JSON Lines (JSONL) into proper-JSON arrays. The JSON Lines format is
  47 simply plain-text lines, where each line is valid JSON on its own.
  48 `
  49 
  50 const indent = `  `
  51 
  52 // errNoMoreOutput is a generic dummy output-error, which is meant to be
  53 // ultimately ignored, being just an excuse to quit the app immediately
  54 // and successfully
  55 var errNoMoreOutput = errors.New(`no more output`)
  56 
  57 func main() {
  58     buffered := false
  59     args := os.Args[1:]
  60 
  61     if len(args) > 0 {
  62         switch args[0] {
  63         case `-b`, `--b`, `-buffered`, `--buffered`:
  64             buffered = true
  65             args = args[1:]
  66 
  67         case `-h`, `--h`, `-help`, `--help`:
  68             os.Stdout.WriteString(info[1:])
  69             return
  70         }
  71     }
  72 
  73     if len(args) > 0 && args[0] == `--` {
  74         args = args[1:]
  75     }
  76 
  77     liveLines := !buffered
  78     if !buffered {
  79         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
  80             liveLines = false
  81         }
  82     }
  83 
  84     err := run(os.Stdout, os.Args[1:], liveLines)
  85     if err != nil && err != io.EOF && err != errNoMoreOutput {
  86         os.Stderr.WriteString(err.Error())
  87         os.Stderr.WriteString("\n")
  88         os.Exit(1)
  89     }
  90 }
  91 
  92 func run(w io.Writer, args []string, live bool) error {
  93     dashes := 0
  94     for _, path := range args {
  95         if path == `-` {
  96             dashes++
  97         }
  98         if dashes > 1 {
  99             return errors.New(`can't read stdin (dash) more than once`)
 100         }
 101     }
 102 
 103     bw := bufio.NewWriter(w)
 104     defer bw.Flush()
 105 
 106     if len(args) == 0 {
 107         return dejsonl(bw, os.Stdin, live)
 108     }
 109 
 110     for _, path := range args {
 111         if err := handleInput(bw, path, live); err != nil {
 112             return err
 113         }
 114     }
 115 
 116     return nil
 117 }
 118 
 119 // handleInput simplifies control-flow for func main
 120 func handleInput(w *bufio.Writer, path string, live bool) error {
 121     if path == `-` {
 122         return dejsonl(w, os.Stdin, live)
 123     }
 124 
 125     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
 126     //  resp, err := http.Get(path)
 127     //  if err != nil {
 128     //      return err
 129     //  }
 130     //  defer resp.Body.Close()
 131     //  return dejsonl(w, resp.Body, live)
 132     // }
 133 
 134     f, err := os.Open(path)
 135     if err != nil {
 136         // on windows, file-not-found error messages may mention `CreateFile`,
 137         // even when trying to open files in read-only mode
 138         return errors.New(`can't open file named ` + path)
 139     }
 140     defer f.Close()
 141     return dejsonl(w, f, live)
 142 }
 143 
 144 // dejsonl simplifies control-flow for func handleInput
 145 func dejsonl(w *bufio.Writer, r io.Reader, live bool) error {
 146     const gb = 1024 * 1024 * 1024
 147     sc := bufio.NewScanner(r)
 148     sc.Buffer(nil, 8*gb)
 149     got := 0
 150 
 151     for i := 0; sc.Scan(); i++ {
 152         s := sc.Text()
 153         if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") {
 154             s = s[3:]
 155         }
 156 
 157         // trim spaces at both ends of the current line
 158         for len(s) > 0 && s[0] == ' ' {
 159             s = s[1:]
 160         }
 161         for len(s) > 0 && s[len(s)-1] == ' ' {
 162             s = s[:len(s)-1]
 163         }
 164 
 165         // ignore empty(ish) lines
 166         if len(s) == 0 {
 167             continue
 168         }
 169 
 170         // ignore lines starting with unix-style comments
 171         if len(s) > 0 && s[0] == '#' {
 172             continue
 173         }
 174 
 175         if err := checkJSONL(strings.NewReader(s)); err != nil {
 176             return err
 177         }
 178 
 179         if got == 0 {
 180             w.WriteByte('[')
 181         } else {
 182             w.WriteByte(',')
 183         }
 184         if w.WriteByte('\n') != nil {
 185             return errNoMoreOutput
 186         }
 187         w.WriteString(indent)
 188         w.WriteString(s)
 189         got++
 190 
 191         if !live {
 192             continue
 193         }
 194 
 195         if err := w.Flush(); err != nil {
 196             return errNoMoreOutput
 197         }
 198     }
 199 
 200     if got == 0 {
 201         w.WriteString("[\n]\n")
 202     } else {
 203         w.WriteString("\n]\n")
 204     }
 205     return sc.Err()
 206 }
 207 
 208 func checkJSONL(r io.Reader) error {
 209     dec := json.NewDecoder(r)
 210     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 211     // even if JSON parsers aren't required to guarantee such input-fidelity
 212     // for numbers
 213     dec.UseNumber()
 214 
 215     t, err := dec.Token()
 216     if err == io.EOF {
 217         return errors.New(`input has no JSON values`)
 218     }
 219 
 220     if err := checkToken(dec, t); err != nil {
 221         return err
 222     }
 223 
 224     _, err = dec.Token()
 225     if err == io.EOF {
 226         // input is over, so it's a success
 227         return nil
 228     }
 229 
 230     if err == nil {
 231         // a successful `read` is a failure, as it means there are
 232         // trailing JSON tokens
 233         return errors.New(`unexpected trailing data`)
 234     }
 235 
 236     // any other error, perhaps some invalid-JSON-syntax-type error
 237     return err
 238 }
 239 
 240 // checkToken handles recursion for func checkJSONL
 241 func checkToken(dec *json.Decoder, t json.Token) error {
 242     switch t := t.(type) {
 243     case json.Delim:
 244         switch t {
 245         case json.Delim('['):
 246             return checkArray(dec)
 247         case json.Delim('{'):
 248             return checkObject(dec)
 249         default:
 250             return errors.New(`unsupported JSON syntax ` + string(t))
 251         }
 252 
 253     case nil, bool, float64, json.Number, string:
 254         return nil
 255 
 256     default:
 257         // return fmt.Errorf(`unsupported token type %T`, t)
 258         return errors.New(`invalid JSON token`)
 259     }
 260 }
 261 
 262 // handleArray handles arrays for func checkToken
 263 func checkArray(dec *json.Decoder) error {
 264     for {
 265         t, err := dec.Token()
 266         if err != nil {
 267             return err
 268         }
 269 
 270         if t == json.Delim(']') {
 271             return nil
 272         }
 273 
 274         if err := checkToken(dec, t); err != nil {
 275             return err
 276         }
 277     }
 278 }
 279 
 280 // handleObject handles objects for func checkToken
 281 func checkObject(dec *json.Decoder) error {
 282     for {
 283         t, err := dec.Token()
 284         if err != nil {
 285             return err
 286         }
 287 
 288         if t == json.Delim('}') {
 289             return nil
 290         }
 291 
 292         if _, ok := t.(string); !ok {
 293             return errors.New(`expected a string for a key-value pair`)
 294         }
 295 
 296         t, err = dec.Token()
 297         if err == io.EOF || t == json.Delim('}') {
 298             return errors.New(`expected a value for a key-value pair`)
 299         }
 300 
 301         if err := checkToken(dec, t); err != nil {
 302             return err
 303         }
 304     }
 305 }