File: dejsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath dejsonl.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40     "strings"
  41 )
  42 
  43 const info = `
  44 dejsonl [filepath...]
  45 
  46 Turn JSON Lines (JSONL) into proper-JSON arrays. The JSON Lines format is
  47 simply plain-text lines, where each line is valid JSON on its own.
  48 `
  49 
  50 const indent = `  `
  51 
  52 func main() {
  53     buffered := false
  54     args := os.Args[1:]
  55 
  56     if len(args) > 0 {
  57         switch args[0] {
  58         case `-b`, `--b`, `-buffered`, `--buffered`:
  59             buffered = true
  60             args = args[1:]
  61 
  62         case `-h`, `--h`, `-help`, `--help`:
  63             os.Stdout.WriteString(info[1:])
  64             return
  65         }
  66     }
  67 
  68     if len(args) > 0 && args[0] == `--` {
  69         args = args[1:]
  70     }
  71 
  72     liveLines := !buffered
  73     if !buffered {
  74         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
  75             liveLines = false
  76         }
  77     }
  78 
  79     err := run(os.Stdout, os.Args[1:], liveLines)
  80     if err != nil && err != io.EOF {
  81         os.Stderr.WriteString(err.Error())
  82         os.Stderr.WriteString("\n")
  83         os.Exit(1)
  84     }
  85 }
  86 
  87 func run(w io.Writer, args []string, live bool) error {
  88     dashes := 0
  89     for _, path := range args {
  90         if path == `-` {
  91             dashes++
  92         }
  93         if dashes > 1 {
  94             return errors.New(`can't read stdin (dash) more than once`)
  95         }
  96     }
  97 
  98     bw := bufio.NewWriter(w)
  99     defer bw.Flush()
 100 
 101     if len(args) == 0 {
 102         return dejsonl(bw, os.Stdin, live)
 103     }
 104 
 105     for _, path := range args {
 106         if err := handleInput(bw, path, live); err != nil {
 107             return err
 108         }
 109     }
 110 
 111     return nil
 112 }
 113 
 114 // handleInput simplifies control-flow for func main
 115 func handleInput(w *bufio.Writer, path string, live bool) error {
 116     if path == `-` {
 117         return dejsonl(w, os.Stdin, live)
 118     }
 119 
 120     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
 121     //  resp, err := http.Get(path)
 122     //  if err != nil {
 123     //      return err
 124     //  }
 125     //  defer resp.Body.Close()
 126     //  return dejsonl(w, resp.Body, live)
 127     // }
 128 
 129     f, err := os.Open(path)
 130     if err != nil {
 131         // on windows, file-not-found error messages may mention `CreateFile`,
 132         // even when trying to open files in read-only mode
 133         return errors.New(`can't open file named ` + path)
 134     }
 135     defer f.Close()
 136     return dejsonl(w, f, live)
 137 }
 138 
 139 // dejsonl simplifies control-flow for func handleInput
 140 func dejsonl(w *bufio.Writer, r io.Reader, live bool) error {
 141     const gb = 1024 * 1024 * 1024
 142     sc := bufio.NewScanner(r)
 143     sc.Buffer(nil, 8*gb)
 144     got := 0
 145 
 146     for i := 0; sc.Scan(); i++ {
 147         s := sc.Text()
 148         if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") {
 149             s = s[3:]
 150         }
 151 
 152         // trim spaces at both ends of the current line
 153         for len(s) > 0 && s[0] == ' ' {
 154             s = s[1:]
 155         }
 156         for len(s) > 0 && s[len(s)-1] == ' ' {
 157             s = s[:len(s)-1]
 158         }
 159 
 160         // ignore empty(ish) lines
 161         if len(s) == 0 {
 162             continue
 163         }
 164 
 165         // ignore lines starting with unix-style comments
 166         if len(s) > 0 && s[0] == '#' {
 167             continue
 168         }
 169 
 170         if err := checkJSONL(strings.NewReader(s)); err != nil {
 171             return err
 172         }
 173 
 174         if got == 0 {
 175             w.WriteByte('[')
 176         } else {
 177             w.WriteByte(',')
 178         }
 179         if w.WriteByte('\n') != nil {
 180             return io.EOF
 181         }
 182         w.WriteString(indent)
 183         w.WriteString(s)
 184         got++
 185 
 186         if !live {
 187             continue
 188         }
 189 
 190         if err := w.Flush(); err != nil {
 191             return io.EOF
 192         }
 193     }
 194 
 195     if got == 0 {
 196         w.WriteString("[\n]\n")
 197     } else {
 198         w.WriteString("\n]\n")
 199     }
 200     return sc.Err()
 201 }
 202 
 203 func checkJSONL(r io.Reader) error {
 204     dec := json.NewDecoder(r)
 205     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 206     // even if JSON parsers aren't required to guarantee such input-fidelity
 207     // for numbers
 208     dec.UseNumber()
 209 
 210     t, err := dec.Token()
 211     if err == io.EOF {
 212         return errors.New(`input has no JSON values`)
 213     }
 214 
 215     if err := checkToken(dec, t); err != nil {
 216         return err
 217     }
 218 
 219     _, err = dec.Token()
 220     if err == io.EOF {
 221         // input is over, so it's a success
 222         return nil
 223     }
 224 
 225     if err == nil {
 226         // a successful `read` is a failure, as it means there are
 227         // trailing JSON tokens
 228         return errors.New(`unexpected trailing data`)
 229     }
 230 
 231     // any other error, perhaps some invalid-JSON-syntax-type error
 232     return err
 233 }
 234 
 235 // checkToken handles recursion for func checkJSONL
 236 func checkToken(dec *json.Decoder, t json.Token) error {
 237     switch t := t.(type) {
 238     case json.Delim:
 239         switch t {
 240         case json.Delim('['):
 241             return checkArray(dec)
 242         case json.Delim('{'):
 243             return checkObject(dec)
 244         default:
 245             return errors.New(`unsupported JSON syntax ` + string(t))
 246         }
 247 
 248     case nil, bool, float64, json.Number, string:
 249         return nil
 250 
 251     default:
 252         // return fmt.Errorf(`unsupported token type %T`, t)
 253         return errors.New(`invalid JSON token`)
 254     }
 255 }
 256 
 257 // handleArray handles arrays for func checkToken
 258 func checkArray(dec *json.Decoder) error {
 259     for {
 260         t, err := dec.Token()
 261         if err != nil {
 262             return err
 263         }
 264 
 265         if t == json.Delim(']') {
 266             return nil
 267         }
 268 
 269         if err := checkToken(dec, t); err != nil {
 270             return err
 271         }
 272     }
 273 }
 274 
 275 // handleObject handles objects for func checkToken
 276 func checkObject(dec *json.Decoder) error {
 277     for {
 278         t, err := dec.Token()
 279         if err != nil {
 280             return err
 281         }
 282 
 283         if t == json.Delim('}') {
 284             return nil
 285         }
 286 
 287         if _, ok := t.(string); !ok {
 288             return errors.New(`expected a string for a key-value pair`)
 289         }
 290 
 291         t, err = dec.Token()
 292         if err == io.EOF || t == json.Delim('}') {
 293             return errors.New(`expected a value for a key-value pair`)
 294         }
 295 
 296         if err := checkToken(dec, t); err != nil {
 297             return err
 298         }
 299     }
 300 }