File: j0.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for j0: this version has no http(s) support. Even
  27 the unit-tests from the original j0 are omitted.
  28 
  29 To compile a smaller-sized command-line app, you can use the `go` command as
  30 follows:
  31 
  32 go build -ldflags "-s -w" -trimpath j0.go
  33 */
  34 
  35 package main
  36 
  37 import (
  38     "bufio"
  39     "bytes"
  40     "errors"
  41     "io"
  42     "os"
  43     "strconv"
  44 )
  45 
  46 const info = `
  47 j0 [options...] [file...]
  48 
  49 
  50 Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.
  51 Its output is always a single line, which ends with a line-feed.
  52 
  53 Besides minimizing bytes, this tool also adapts almost-JSON input into
  54 valid JSON, since it
  55 
  56     - ignores both rest-of-line and multi-line comments
  57     - ignores extra/trailing commas in arrays and objects
  58     - turns single-quoted strings/keys into double-quoted strings
  59     - double-quotes unquoted object keys
  60     - changes \x 2-hex-digit into \u 4-hex-digit string-escapes
  61 
  62 All options available can either start with a single or a double-dash
  63 
  64     -h        show this help message
  65     -help     show this help message
  66     -jsonl    emit JSON Lines, when top-level value is an array
  67 `
  68 
  69 const (
  70     bufSize    = 32 * 1024
  71     errorStyle = "\x1b[31m"
  72 )
  73 
  74 func main() {
  75     args := os.Args[1:]
  76     handler := json0
  77 
  78     if len(args) > 0 {
  79         switch os.Args[1] {
  80         case `-h`, `--h`, `-help`, `--help`:
  81             os.Stderr.WriteString(info[1:])
  82             return
  83         case `-jsonl`, `--jsonl`:
  84             handler = jsonl
  85             args = args[1:]
  86         }
  87     }
  88 
  89     if len(args) > 1 {
  90         const msg = `only 1 (optional) named input is supported`
  91         os.Stderr.WriteString(errorStyle + msg + "\x1b[0m\n")
  92         os.Exit(1)
  93     }
  94 
  95     name := `-`
  96     if len(args) == 1 {
  97         name = args[0]
  98     }
  99 
 100     if err := run(os.Stdout, name, handler); isActualError(err) {
 101         os.Stderr.WriteString(errorStyle)
 102         os.Stderr.WriteString(err.Error())
 103         os.Stderr.WriteString("\x1b[0m\n")
 104         os.Exit(1)
 105     }
 106 }
 107 
 108 type handlerFunc func(w *bufio.Writer, r *bufio.Reader) error
 109 
 110 func run(w io.Writer, name string, handler handlerFunc) error {
 111     if name == `` || name == `-` {
 112         bw := bufio.NewWriterSize(w, bufSize)
 113         br := bufio.NewReaderSize(os.Stdin, bufSize)
 114         defer bw.Flush()
 115         return handler(bw, br)
 116     }
 117 
 118     f, err := os.Open(name)
 119     if err != nil {
 120         return errors.New(`can't read from file named "` + name + `"`)
 121     }
 122     defer f.Close()
 123 
 124     bw := bufio.NewWriterSize(w, bufSize)
 125     br := bufio.NewReaderSize(f, bufSize)
 126     defer bw.Flush()
 127     return handler(bw, br)
 128 }
 129 
 130 var (
 131     errCommentEarlyEnd = errors.New(`unexpected early-end of comment`)
 132     errInputEarlyEnd   = errors.New(`expected end of input data`)
 133     errInvalidComment  = errors.New(`expected / or *`)
 134     errInvalidHex      = errors.New(`expected a base-16 digit`)
 135     errInvalidToken    = errors.New(`invalid JSON token`)
 136     errNoDigits        = errors.New(`expected numeric digits`)
 137     errNoStringQuote   = errors.New(`expected " or '`)
 138     errNoArrayComma    = errors.New(`missing comma between array values`)
 139     errNoObjectComma   = errors.New(`missing comma between key-value pairs`)
 140     errStringEarlyEnd  = errors.New(`unexpected early-end of string`)
 141     errExtraBytes      = errors.New(`unexpected extra input bytes`)
 142 
 143     // errNoMoreOutput is a generic dummy output-error, which is meant to be
 144     // ultimately ignored, being just an excuse to quit the app immediately
 145     // and successfully
 146     errNoMoreOutput = errors.New(`no more output`)
 147 )
 148 
 149 // isActualError is to figure out whether not to ignore an error, and thus
 150 // show it as an error message
 151 func isActualError(err error) bool {
 152     return err != nil && err != io.EOF && err != errNoMoreOutput
 153 }
 154 
 155 // linePosError is a more descriptive kind of error, showing the source of
 156 // the input-related problem, as 1-based a line/pos number pair in front
 157 // of the error message
 158 type linePosError struct {
 159     // line is the 1-based line count from the input
 160     line int
 161 
 162     // pos is the 1-based `horizontal` position in its line
 163     pos int
 164 
 165     // err is the error message to `decorate` with the position info
 166     err error
 167 }
 168 
 169 // Error satisfies the error interface
 170 func (lpe linePosError) Error() string {
 171     where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos)
 172     return where + `: ` + lpe.err.Error()
 173 }
 174 
 175 // isIdentifier improves control-flow of func handleKey, when it handles
 176 // unquoted object keys
 177 var isIdentifier = [256]bool{
 178     '_': true,
 179 
 180     '0': true, '1': true, '2': true, '3': true, '4': true,
 181     '5': true, '6': true, '7': true, '8': true, '9': true,
 182 
 183     'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true,
 184     'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true,
 185     'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true,
 186     'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
 187     'Y': true, 'Z': true,
 188 
 189     'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true,
 190     'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true,
 191     'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true,
 192     's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
 193     'y': true, 'z': true,
 194 }
 195 
 196 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not
 197 // being 0, and normalizes letter-case for the hex letters
 198 var matchHex = [256]byte{
 199     '0': '0', '1': '1', '2': '2', '3': '3', '4': '4',
 200     '5': '5', '6': '6', '7': '7', '8': '8', '9': '9',
 201     'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F',
 202     'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F',
 203 }
 204 
 205 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON
 206 func json0(w *bufio.Writer, r *bufio.Reader) error {
 207     jr := jsonReader{r, 1, 1}
 208     defer w.Flush()
 209 
 210     if err := jr.handleLeadingJunk(); err != nil {
 211         return err
 212     }
 213 
 214     // handle a single top-level JSON value
 215     err := handleValue(w, &jr)
 216 
 217     // end the only output-line with a line-feed; this also avoids showing
 218     // error messages on the same line as the main output, since JSON-0
 219     // output has no line-feeds before its last byte
 220     outputByte(w, '\n')
 221 
 222     if err != nil {
 223         return err
 224     }
 225     return jr.handleTrailingJunk()
 226 }
 227 
 228 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func
 229 // avoids writing a trailing line-feed, leaving that up to its caller
 230 func jsonl(w *bufio.Writer, r *bufio.Reader) error {
 231     jr := jsonReader{r, 1, 1}
 232 
 233     if err := jr.handleLeadingJunk(); err != nil {
 234         return err
 235     }
 236 
 237     chunk, err := jr.r.Peek(1)
 238     if err == nil && len(chunk) >= 1 && chunk[0] == '[' {
 239         return handleArrayJSONL(w, &jr)
 240     }
 241 
 242     // handle a single top-level JSON value
 243     err = handleValue(w, &jr)
 244 
 245     // end the only output-line with a line-feed; this also avoids showing
 246     // error messages on the same line as the main output, since JSON-0
 247     // output has no line-feeds before its last byte
 248     outputByte(w, '\n')
 249 
 250     if err != nil {
 251         return err
 252     }
 253     return jr.handleTrailingJunk()
 254 }
 255 
 256 // handleArrayJSONL handles top-level arrays for func jsonl
 257 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader) error {
 258     if err := jr.demandSyntax('['); err != nil {
 259         return err
 260     }
 261 
 262     for n := 0; true; n++ {
 263         // there may be whitespace/comments before the next comma
 264         if err := jr.seekNext(); err != nil {
 265             return err
 266         }
 267 
 268         // handle commas between values, as well as trailing ones
 269         comma := false
 270         b, _ := jr.peekByte()
 271         if b == ',' {
 272             jr.readByte()
 273             comma = true
 274 
 275             // there may be whitespace/comments before an ending ']'
 276             if err := jr.seekNext(); err != nil {
 277                 return err
 278             }
 279             b, _ = jr.peekByte()
 280         }
 281 
 282         // handle end of array
 283         if b == ']' {
 284             jr.readByte()
 285             if n > 0 {
 286                 err := outputByte(w, '\n')
 287                 w.Flush()
 288                 return err
 289             }
 290             return nil
 291         }
 292 
 293         // turn commas between adjacent values into line-feeds, as the
 294         // output for this custom func is supposed to be JSON Lines
 295         if n > 0 {
 296             if !comma {
 297                 return errNoArrayComma
 298             }
 299             if err := outputByte(w, '\n'); err != nil {
 300                 return err
 301             }
 302             w.Flush()
 303         }
 304 
 305         // handle the next value
 306         if err := jr.seekNext(); err != nil {
 307             return err
 308         }
 309         if err := handleValue(w, jr); err != nil {
 310             return err
 311         }
 312     }
 313 
 314     // make the compiler happy
 315     return nil
 316 }
 317 
 318 // jsonReader reads data via a buffer, keeping track of the input position:
 319 // this in turn allows showing much more useful errors, when these happen
 320 type jsonReader struct {
 321     // r is the actual reader
 322     r *bufio.Reader
 323 
 324     // line is the 1-based line-counter for input bytes, and gives errors
 325     // useful position info
 326     line int
 327 
 328     // pos is the 1-based `horizontal` position in its line, and gives
 329     // errors useful position info
 330     pos int
 331 }
 332 
 333 // improveError makes any error more useful, by giving it info about the
 334 // current input-position, as a 1-based line/within-line-position pair
 335 func (jr jsonReader) improveError(err error) error {
 336     if _, ok := err.(linePosError); ok {
 337         return err
 338     }
 339 
 340     if err == io.EOF {
 341         return linePosError{jr.line, jr.pos, errInputEarlyEnd}
 342     }
 343     if err != nil {
 344         return linePosError{jr.line, jr.pos, err}
 345     }
 346     return nil
 347 }
 348 
 349 func (jr *jsonReader) handleLeadingJunk() error {
 350     // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order
 351     // mark) gives no useful info if present, as UTF-8 leaves no ambiguity
 352     // about byte-order by design
 353     jr.skipUTF8BOM()
 354 
 355     // ignore leading whitespace and/or comments
 356     return jr.seekNext()
 357 }
 358 
 359 func (jr *jsonReader) handleTrailingJunk() error {
 360     // ignore trailing whitespace and/or comments
 361     if err := jr.seekNext(); err != nil {
 362         return err
 363     }
 364 
 365     // ignore trailing semicolon
 366     if b, ok := jr.peekByte(); ok && b == ';' {
 367         // ignore trailing whitespace and/or comments
 368         if err := jr.seekNext(); err != nil {
 369             return err
 370         }
 371     }
 372 
 373     // beyond trailing whitespace and/or comments, any more bytes
 374     // make the whole input data invalid JSON
 375     if _, ok := jr.peekByte(); ok {
 376         return jr.improveError(errExtraBytes)
 377     }
 378     return nil
 379 }
 380 
 381 // demandSyntax fails with an error when the next byte isn't the one given;
 382 // when it is, the byte is then read/skipped, and a nil error is returned
 383 func (jr *jsonReader) demandSyntax(syntax byte) error {
 384     chunk, err := jr.r.Peek(1)
 385     if err == io.EOF {
 386         return jr.improveError(errInputEarlyEnd)
 387     }
 388     if err != nil {
 389         return jr.improveError(err)
 390     }
 391 
 392     if len(chunk) < 1 || chunk[0] != syntax {
 393         msg := `expected ` + string(rune(syntax))
 394         return jr.improveError(errors.New(msg))
 395     }
 396 
 397     jr.readByte()
 398     return nil
 399 }
 400 
 401 // updatePosInfo does what it says, given the byte just read separately
 402 func (jr *jsonReader) updatePosInfo(b byte) {
 403     if b == '\n' {
 404         jr.line += 1
 405         jr.pos = 1
 406     } else {
 407         jr.pos++
 408     }
 409 }
 410 
 411 // peekByte simplifies control-flow for various other funcs
 412 func (jr jsonReader) peekByte() (b byte, ok bool) {
 413     chunk, err := jr.r.Peek(1)
 414     if err == nil && len(chunk) >= 1 {
 415         return chunk[0], true
 416     }
 417     return 0, false
 418 }
 419 
 420 // readByte does what it says, updating the reader's position info
 421 func (jr *jsonReader) readByte() (b byte, err error) {
 422     b, err = jr.r.ReadByte()
 423     if err == nil {
 424         jr.updatePosInfo(b)
 425         return b, nil
 426     }
 427     return b, jr.improveError(err)
 428 }
 429 
 430 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols
 431 // and comments, either single-line (starting with //) or general (starting
 432 // with /* and ending with */)
 433 func (jr *jsonReader) seekNext() error {
 434     for {
 435         b, ok := jr.peekByte()
 436         if !ok {
 437             return nil
 438         }
 439 
 440         // case ' ', '\t', '\f', '\v', '\r', '\n':
 441         if b <= 32 {
 442             // keep skipping whitespace bytes
 443             b, _ := jr.readByte()
 444             jr.updatePosInfo(b)
 445             continue
 446         }
 447 
 448         if b != '/' {
 449             // reached the next token
 450             return nil
 451         }
 452 
 453         if err := jr.skipComment(); err != nil {
 454             return err
 455         }
 456 
 457         // after comments, keep looking for more whitespace and/or comments
 458     }
 459 }
 460 
 461 // skipComment helps func seekNext skip over comments, simplifying the latter
 462 // func's control-flow
 463 func (jr *jsonReader) skipComment() error {
 464     err := jr.demandSyntax('/')
 465     if err != nil {
 466         return err
 467     }
 468 
 469     b, ok := jr.peekByte()
 470     if !ok {
 471         return jr.improveError(errInputEarlyEnd)
 472     }
 473 
 474     switch b {
 475     case '/':
 476         // handle single-line comments
 477         return jr.skipLine()
 478 
 479     case '*':
 480         // handle (potentially) multi-line comments
 481         return jr.skipGeneralComment()
 482 
 483     default:
 484         return jr.improveError(errInvalidComment)
 485     }
 486 }
 487 
 488 // skipLine handles single-line comments for func skipComment
 489 func (jr *jsonReader) skipLine() error {
 490     for {
 491         b, err := jr.r.ReadByte()
 492         if err == io.EOF {
 493             // end of input is fine in this case
 494             return nil
 495         }
 496         if err != nil {
 497             return err
 498         }
 499 
 500         jr.updatePosInfo(b)
 501         if b == '\n' {
 502             jr.line++
 503             return nil
 504         }
 505     }
 506 }
 507 
 508 // skipGeneralComment handles (potentially) multi-line comments for func
 509 // skipComment
 510 func (jr *jsonReader) skipGeneralComment() error {
 511     var prev byte
 512     for {
 513         b, err := jr.readByte()
 514         if err != nil {
 515             return jr.improveError(errCommentEarlyEnd)
 516         }
 517 
 518         if prev == '*' && b == '/' {
 519             return nil
 520         }
 521         if b == '\n' {
 522             jr.line++
 523         }
 524         prev = b
 525     }
 526 }
 527 
 528 // skipUTF8BOM does what it says, if a UTF-8 BOM is present
 529 func (jr *jsonReader) skipUTF8BOM() {
 530     lead, err := jr.r.Peek(3)
 531     if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) {
 532         jr.readByte()
 533         jr.readByte()
 534         jr.readByte()
 535         jr.pos += 3
 536     }
 537 }
 538 
 539 // outputByte is a small wrapper on func WriteByte, which adapts any error
 540 // into a custom dummy output-error, which is in turn meant to be ignored,
 541 // being just an excuse to quit the app immediately and successfully
 542 func outputByte(w *bufio.Writer, b byte) error {
 543     err := w.WriteByte(b)
 544     if err == nil {
 545         return nil
 546     }
 547     return errNoMoreOutput
 548 }
 549 
 550 // handleArray handles arrays for func handleValue
 551 func handleArray(w *bufio.Writer, jr *jsonReader) error {
 552     if err := jr.demandSyntax('['); err != nil {
 553         return err
 554     }
 555     w.WriteByte('[')
 556 
 557     for n := 0; true; n++ {
 558         // there may be whitespace/comments before the next comma
 559         if err := jr.seekNext(); err != nil {
 560             return err
 561         }
 562 
 563         // handle commas between values, as well as trailing ones
 564         comma := false
 565         b, _ := jr.peekByte()
 566         if b == ',' {
 567             jr.readByte()
 568             comma = true
 569 
 570             // there may be whitespace/comments before an ending ']'
 571             if err := jr.seekNext(); err != nil {
 572                 return err
 573             }
 574             b, _ = jr.peekByte()
 575         }
 576 
 577         // handle end of array
 578         if b == ']' {
 579             jr.readByte()
 580             w.WriteByte(']')
 581             return nil
 582         }
 583 
 584         // don't forget commas between adjacent values
 585         if n > 0 {
 586             if !comma {
 587                 return errNoArrayComma
 588             }
 589             if err := outputByte(w, ','); err != nil {
 590                 return err
 591             }
 592         }
 593 
 594         // handle the next value
 595         if err := jr.seekNext(); err != nil {
 596             return err
 597         }
 598         if err := handleValue(w, jr); err != nil {
 599             return err
 600         }
 601     }
 602 
 603     // make the compiler happy
 604     return nil
 605 }
 606 
 607 // handleDigits helps various number-handling funcs do their job
 608 func handleDigits(w *bufio.Writer, jr *jsonReader) error {
 609     for n := 0; true; n++ {
 610         b, _ := jr.peekByte()
 611 
 612         // support `nice` long numbers by ignoring their underscores
 613         if b == '_' {
 614             jr.readByte()
 615             continue
 616         }
 617 
 618         if '0' <= b && b <= '9' {
 619             jr.readByte()
 620             w.WriteByte(b)
 621             continue
 622         }
 623 
 624         if n == 0 {
 625             return errNoDigits
 626         }
 627         return nil
 628     }
 629 
 630     // make the compiler happy
 631     return nil
 632 }
 633 
 634 // handleDot handles pseudo-JSON numbers which start with a decimal dot
 635 func handleDot(w *bufio.Writer, jr *jsonReader) error {
 636     if err := jr.demandSyntax('.'); err != nil {
 637         return err
 638     }
 639     w.Write([]byte{'0', '.'})
 640     return handleDigits(w, jr)
 641 }
 642 
 643 // handleKey is used by func handleObjects and generalizes func handleString,
 644 // by allowing unquoted object keys; it's not used anywhere else, as allowing
 645 // unquoted string values is ambiguous with actual JSON-keyword values null,
 646 // false, and true.
 647 func handleKey(w *bufio.Writer, jr *jsonReader) error {
 648     quote, ok := jr.peekByte()
 649     if quote == '"' || quote == '\'' {
 650         return handleString(w, jr)
 651     }
 652     if !ok {
 653         return jr.improveError(errStringEarlyEnd)
 654     }
 655 
 656     w.WriteByte('"')
 657     for {
 658         if b, _ := jr.peekByte(); isIdentifier[b] {
 659             jr.readByte()
 660             w.WriteByte(b)
 661             continue
 662         }
 663 
 664         w.WriteByte('"')
 665         return nil
 666     }
 667 }
 668 
 669 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes
 670 // are unescaped ASCII symbols: this is a very common case for strings, and is
 671 // almost always the case for object keys; returns whether it succeeded, so
 672 // this func's caller knows knows if it needs to do anything, the slower way
 673 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) {
 674     chunk, _ := jr.r.Peek(64)
 675 
 676     for i, b := range chunk {
 677         if b < 32 || b > 127 || b == '\\' {
 678             return false
 679         }
 680         if b != quote {
 681             continue
 682         }
 683 
 684         // bulk-writing the chunk is this func's whole point
 685         w.WriteByte('"')
 686         w.Write(chunk[:i])
 687         w.WriteByte('"')
 688 
 689         jr.r.Discard(i + 1)
 690         return true
 691     }
 692 
 693     // maybe the inner-string is ok, but it's just longer than the chunk
 694     return false
 695 }
 696 
 697 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue
 698 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error {
 699     for rest := kw; len(rest) > 0; rest = rest[1:] {
 700         b, err := jr.readByte()
 701         if err == nil && b == rest[0] {
 702             // keywords given to this func have no line-feeds
 703             jr.pos++
 704             continue
 705         }
 706 
 707         msg := `expected JSON value ` + string(kw)
 708         return jr.improveError(errors.New(msg))
 709     }
 710 
 711     w.Write(kw)
 712     return nil
 713 }
 714 
 715 // handleNegative handles numbers starting with a negative sign for func
 716 // handleValue
 717 func handleNegative(w *bufio.Writer, jr *jsonReader) error {
 718     if err := jr.demandSyntax('-'); err != nil {
 719         return err
 720     }
 721 
 722     w.WriteByte('-')
 723     if b, _ := jr.peekByte(); b == '.' {
 724         jr.readByte()
 725         w.Write([]byte{'0', '.'})
 726         return handleDigits(w, jr)
 727     }
 728     return handleNumber(w, jr)
 729 }
 730 
 731 // handleNumber handles numeric values/tokens, including invalid-JSON cases,
 732 // such as values starting with a decimal dot
 733 func handleNumber(w *bufio.Writer, jr *jsonReader) error {
 734     // handle integer digits
 735     if err := handleDigits(w, jr); err != nil {
 736         return err
 737     }
 738 
 739     // handle optional decimal digits, starting with a leading dot
 740     if b, _ := jr.peekByte(); b == '.' {
 741         jr.readByte()
 742         w.WriteByte('.')
 743         return handleDigits(w, jr)
 744     }
 745 
 746     // handle optional exponent digits
 747     if b, _ := jr.peekByte(); b == 'e' || b == 'E' {
 748         jr.readByte()
 749         w.WriteByte(b)
 750         b, _ = jr.peekByte()
 751         if b == '+' {
 752             jr.readByte()
 753         } else if b == '-' {
 754             w.WriteByte('-')
 755             jr.readByte()
 756         }
 757         return handleDigits(w, jr)
 758     }
 759 
 760     return nil
 761 }
 762 
 763 // handleObject handles objects for func handleValue
 764 func handleObject(w *bufio.Writer, jr *jsonReader) error {
 765     if err := jr.demandSyntax('{'); err != nil {
 766         return err
 767     }
 768     w.WriteByte('{')
 769 
 770     for npairs := 0; true; npairs++ {
 771         // there may be whitespace/comments before the next comma
 772         if err := jr.seekNext(); err != nil {
 773             return err
 774         }
 775 
 776         // handle commas between key-value pairs, as well as trailing ones
 777         comma := false
 778         b, _ := jr.peekByte()
 779         if b == ',' {
 780             jr.readByte()
 781             comma = true
 782 
 783             // there may be whitespace/comments before an ending '}'
 784             if err := jr.seekNext(); err != nil {
 785                 return err
 786             }
 787             b, _ = jr.peekByte()
 788         }
 789 
 790         // handle end of object
 791         if b == '}' {
 792             jr.readByte()
 793             w.WriteByte('}')
 794             return nil
 795         }
 796 
 797         // don't forget commas between adjacent key-value pairs
 798         if npairs > 0 {
 799             if !comma {
 800                 return errNoObjectComma
 801             }
 802             if err := outputByte(w, ','); err != nil {
 803                 return err
 804             }
 805         }
 806 
 807         // handle the next pair's key
 808         if err := jr.seekNext(); err != nil {
 809             return err
 810         }
 811         if err := handleKey(w, jr); err != nil {
 812             return err
 813         }
 814 
 815         // demand a colon right after the key
 816         if err := jr.seekNext(); err != nil {
 817             return err
 818         }
 819         if err := jr.demandSyntax(':'); err != nil {
 820             return err
 821         }
 822         w.WriteByte(':')
 823 
 824         // handle the next pair's value
 825         if err := jr.seekNext(); err != nil {
 826             return err
 827         }
 828         if err := handleValue(w, jr); err != nil {
 829             return err
 830         }
 831     }
 832 
 833     // make the compiler happy
 834     return nil
 835 }
 836 
 837 // handlePositive handles numbers starting with a positive sign for func
 838 // handleValue
 839 func handlePositive(w *bufio.Writer, jr *jsonReader) error {
 840     if err := jr.demandSyntax('+'); err != nil {
 841         return err
 842     }
 843 
 844     // valid JSON isn't supposed to have leading pluses on numbers, so
 845     // emit nothing for it, unlike for negative numbers
 846 
 847     if b, _ := jr.peekByte(); b == '.' {
 848         jr.readByte()
 849         w.Write([]byte{'0', '.'})
 850         return handleDigits(w, jr)
 851     }
 852     return handleNumber(w, jr)
 853 }
 854 
 855 // handleString handles strings for funcs handleValue and handleObject, and
 856 // supports both single-quotes and double-quotes, always emitting the latter
 857 // in the output, of course
 858 func handleString(w *bufio.Writer, jr *jsonReader) error {
 859     quote, ok := jr.peekByte()
 860     if !ok || (quote != '"' && quote != '\'') {
 861         return errNoStringQuote
 862     }
 863 
 864     jr.readByte()
 865     // try the quicker all-unescaped-ASCII handler
 866     if trySimpleInner(w, jr, quote) {
 867         return nil
 868     }
 869 
 870     // it's a non-trivial inner-string, so handle it byte-by-byte
 871     w.WriteByte('"')
 872     escaped := false
 873 
 874     for {
 875         b, err := jr.r.ReadByte()
 876         if err != nil {
 877             if err == io.EOF {
 878                 return jr.improveError(errStringEarlyEnd)
 879             }
 880             return jr.improveError(err)
 881         }
 882 
 883         if !escaped {
 884             if b == '\\' {
 885                 escaped = true
 886                 continue
 887             }
 888 
 889             // handle end of string
 890             if b == quote {
 891                 return outputByte(w, '"')
 892             }
 893 
 894             w.Write(escapedStringBytes[b])
 895             jr.updatePosInfo(b)
 896             continue
 897         }
 898 
 899         // handle escaped items
 900         escaped = false
 901 
 902         switch b {
 903         case 'u':
 904             // \u needs exactly 4 hex-digits to follow it
 905             w.Write([]byte{'\\', 'u'})
 906             if err := copyHex(w, 4, jr); err != nil {
 907                 return jr.improveError(err)
 908             }
 909 
 910         case 'x':
 911             // JSON only supports 4 escaped hex-digits, so pad the 2
 912             // expected hex-digits with 2 zeros
 913             w.Write([]byte{'\\', 'u', '0', '0'})
 914             if err := copyHex(w, 2, jr); err != nil {
 915                 return jr.improveError(err)
 916             }
 917 
 918         case 't', 'f', 'r', 'n', 'b', '\\', '"':
 919             // handle valid-JSON escaped string sequences
 920             w.WriteByte('\\')
 921             w.WriteByte(b)
 922 
 923         // case '\'':
 924         //  // escaped single-quotes aren't standard JSON, but they can
 925         //  // be handy when the input uses non-standard single-quoted
 926         //  // strings
 927         //  w.WriteByte('\'')
 928 
 929         default:
 930             // return jr.decorateError(unexpectedByte{b})
 931             w.Write(escapedStringBytes[b])
 932         }
 933     }
 934 }
 935 
 936 // copyHex handles a run of hex-digits for func handleString, starting right
 937 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its
 938 // errors with position info: that's up to the caller
 939 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error {
 940     for i := 0; i < n; i++ {
 941         b, err := jr.r.ReadByte()
 942         if err == io.EOF {
 943             return errStringEarlyEnd
 944         }
 945         if err != nil {
 946             return err
 947         }
 948 
 949         jr.updatePosInfo(b)
 950 
 951         if b := matchHex[b]; b != 0 {
 952             w.WriteByte(b)
 953             continue
 954         }
 955 
 956         return errInvalidHex
 957     }
 958 
 959     return nil
 960 }
 961 
 962 // handleValue is a generic JSON-token handler, which allows the recursive
 963 // behavior to handle any kind of JSON/pseudo-JSON input
 964 func handleValue(w *bufio.Writer, jr *jsonReader) error {
 965     chunk, err := jr.r.Peek(1)
 966     if err == nil && len(chunk) >= 1 {
 967         return handleValueDispatch(w, jr, chunk[0])
 968     }
 969 
 970     if err == io.EOF {
 971         return jr.improveError(errInputEarlyEnd)
 972     }
 973     return jr.improveError(errInputEarlyEnd)
 974 }
 975 
 976 // handleValueDispatch simplifies control-flow for func handleValue
 977 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error {
 978     switch b {
 979     case 'f':
 980         return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'})
 981     case 'n':
 982         return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'})
 983     case 't':
 984         return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'})
 985     case '.':
 986         return handleDot(w, jr)
 987     case '+':
 988         return handlePositive(w, jr)
 989     case '-':
 990         return handleNegative(w, jr)
 991     case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 992         return handleNumber(w, jr)
 993     case '\'', '"':
 994         return handleString(w, jr)
 995     case '[':
 996         return handleArray(w, jr)
 997     case '{':
 998         return handleObject(w, jr)
 999     default:
1000         return jr.improveError(errInvalidToken)
1001     }
1002 }
1003 
1004 // escapedStringBytes helps func handleString treat all string bytes quickly
1005 // and correctly, using their officially-supported JSON escape sequences
1006 //
1007 // https://www.rfc-editor.org/rfc/rfc8259#section-7
1008 var escapedStringBytes = [256][]byte{
1009     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
1010     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
1011     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
1012     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
1013     {'\\', 'b'}, {'\\', 't'},
1014     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
1015     {'\\', 'f'}, {'\\', 'r'},
1016     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
1017     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
1018     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
1019     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
1020     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
1021     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
1022     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
1023     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
1024     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
1025     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
1026     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
1027     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
1028     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
1029     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
1030     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
1031     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
1032     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
1033     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
1034     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
1035     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
1036     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
1037     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
1038     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
1039     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
1040     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
1041     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
1042     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
1043     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
1044     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
1045     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
1046     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
1047     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
1048     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
1049     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
1050     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
1051     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
1052     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
1053 }