File: j0.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for j0: this version has no http(s) support. Even
  27 the unit-tests from the original j0 are omitted.
  28 
  29 To compile a smaller-sized command-line app, you can use the `go` command as
  30 follows:
  31 
  32 go build -ldflags "-s -w" -trimpath j0.go
  33 */
  34 
  35 package main
  36 
  37 import (
  38     "bufio"
  39     "bytes"
  40     "errors"
  41     "io"
  42     "os"
  43     "strconv"
  44     "unicode"
  45 )
  46 
  47 const info = `
  48 j0 [options...] [file...]
  49 
  50 
  51 Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.
  52 Its output is always a single line, which ends with a line-feed.
  53 
  54 Besides minimizing bytes, this tool also adapts almost-JSON input into
  55 valid JSON, since it
  56 
  57     - ignores both rest-of-line and multi-line comments
  58     - ignores extra/trailing commas in arrays and objects
  59     - turns single-quoted strings/keys into double-quoted strings
  60     - double-quotes unquoted object keys
  61     - changes \x 2-hex-digit into \u 4-hex-digit string-escapes
  62 
  63 All options available can either start with a single or a double-dash
  64 
  65     -h        show this help message
  66     -help     show this help message
  67     -jsonl    emit JSON Lines, when top-level value is an array
  68 `
  69 
  70 const (
  71     bufSize    = 32 * 1024
  72     errorStyle = "\x1b[31m"
  73 )
  74 
  75 func main() {
  76     args := os.Args[1:]
  77     handler := json0
  78 
  79     if len(args) > 0 {
  80         switch os.Args[1] {
  81         case `-h`, `--h`, `-help`, `--help`:
  82             os.Stderr.WriteString(info[1:])
  83             return
  84         case `-jsonl`, `--jsonl`:
  85             handler = jsonl
  86             args = args[1:]
  87         }
  88     }
  89 
  90     if len(args) > 1 {
  91         const msg = `only 1 (optional) named input is supported`
  92         os.Stderr.WriteString(errorStyle + msg + "\x1b[0m\n")
  93         os.Exit(1)
  94     }
  95 
  96     name := `-`
  97     if len(args) == 1 {
  98         name = args[0]
  99     }
 100 
 101     if err := run(os.Stdout, name, handler); isActualError(err) {
 102         os.Stderr.WriteString(errorStyle)
 103         os.Stderr.WriteString(err.Error())
 104         os.Stderr.WriteString("\x1b[0m\n")
 105         os.Exit(1)
 106     }
 107 }
 108 
 109 type handlerFunc func(w *bufio.Writer, r *bufio.Reader) error
 110 
 111 func run(w io.Writer, name string, handler handlerFunc) error {
 112     if name == `` || name == `-` {
 113         bw := bufio.NewWriterSize(w, bufSize)
 114         br := bufio.NewReaderSize(os.Stdin, bufSize)
 115         defer bw.Flush()
 116         return handler(bw, br)
 117     }
 118 
 119     f, err := os.Open(name)
 120     if err != nil {
 121         return errors.New(`can't read from file named "` + name + `"`)
 122     }
 123     defer f.Close()
 124 
 125     bw := bufio.NewWriterSize(w, bufSize)
 126     br := bufio.NewReaderSize(f, bufSize)
 127     defer bw.Flush()
 128     return handler(bw, br)
 129 }
 130 
 131 var (
 132     errCommentEarlyEnd = errors.New(`unexpected early-end of comment`)
 133     errInputEarlyEnd   = errors.New(`expected end of input data`)
 134     errInvalidComment  = errors.New(`expected / or *`)
 135     errInvalidHex      = errors.New(`expected a base-16 digit`)
 136     errInvalidRune     = errors.New(`invalid UTF-8 bytes`)
 137     errInvalidToken    = errors.New(`invalid JSON token`)
 138     errNoDigits        = errors.New(`expected numeric digits`)
 139     errNoStringQuote   = errors.New(`expected " or '`)
 140     errNoArrayComma    = errors.New(`missing comma between array values`)
 141     errNoObjectComma   = errors.New(`missing comma between key-value pairs`)
 142     errStringEarlyEnd  = errors.New(`unexpected early-end of string`)
 143     errExtraBytes      = errors.New(`unexpected extra input bytes`)
 144 
 145     // errNoMoreOutput is a generic dummy output-error, which is meant to be
 146     // ultimately ignored, being just an excuse to quit the app immediately
 147     // and successfully
 148     errNoMoreOutput = errors.New(`no more output`)
 149 )
 150 
 151 // isActualError is to figure out whether not to ignore an error, and thus
 152 // show it as an error message
 153 func isActualError(err error) bool {
 154     return err != nil && err != io.EOF && err != errNoMoreOutput
 155 }
 156 
 157 // linePosError is a more descriptive kind of error, showing the source of
 158 // the input-related problem, as 1-based a line/pos number pair in front
 159 // of the error message
 160 type linePosError struct {
 161     // line is the 1-based line count from the input
 162     line int
 163 
 164     // pos is the 1-based `horizontal` position in its line
 165     pos int
 166 
 167     // err is the error message to `decorate` with the position info
 168     err error
 169 }
 170 
 171 // Error satisfies the error interface
 172 func (lpe linePosError) Error() string {
 173     where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos)
 174     return where + `: ` + lpe.err.Error()
 175 }
 176 
 177 // isIdentifier improves control-flow of func handleKey, when it handles
 178 // unquoted object keys
 179 var isIdentifier = [256]bool{
 180     '_': true,
 181 
 182     '0': true, '1': true, '2': true, '3': true, '4': true,
 183     '5': true, '6': true, '7': true, '8': true, '9': true,
 184 
 185     'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true,
 186     'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true,
 187     'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true,
 188     'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
 189     'Y': true, 'Z': true,
 190 
 191     'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true,
 192     'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true,
 193     'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true,
 194     's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
 195     'y': true, 'z': true,
 196 }
 197 
 198 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not
 199 // being 0, and normalizes letter-case for the hex letters
 200 var matchHex = [256]byte{
 201     '0': '0', '1': '1', '2': '2', '3': '3', '4': '4',
 202     '5': '5', '6': '6', '7': '7', '8': '8', '9': '9',
 203     'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F',
 204     'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F',
 205 }
 206 
 207 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON
 208 func json0(w *bufio.Writer, r *bufio.Reader) error {
 209     jr := jsonReader{r, 1, 1}
 210     defer w.Flush()
 211 
 212     if err := jr.handleLeadingJunk(); err != nil {
 213         return err
 214     }
 215 
 216     // handle a single top-level JSON value
 217     err := handleValue(w, &jr)
 218 
 219     // end the only output-line with a line-feed; this also avoids showing
 220     // error messages on the same line as the main output, since JSON-0
 221     // output has no line-feeds before its last byte
 222     outputByte(w, '\n')
 223 
 224     if err != nil {
 225         return err
 226     }
 227     return jr.handleTrailingJunk()
 228 }
 229 
 230 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func
 231 // avoids writing a trailing line-feed, leaving that up to its caller
 232 func jsonl(w *bufio.Writer, r *bufio.Reader) error {
 233     jr := jsonReader{r, 1, 1}
 234 
 235     if err := jr.handleLeadingJunk(); err != nil {
 236         return err
 237     }
 238 
 239     chunk, err := jr.r.Peek(1)
 240     if err == nil && len(chunk) >= 1 && chunk[0] == '[' {
 241         return handleArrayJSONL(w, &jr)
 242     }
 243 
 244     // handle a single top-level JSON value
 245     err = handleValue(w, &jr)
 246 
 247     // end the only output-line with a line-feed; this also avoids showing
 248     // error messages on the same line as the main output, since JSON-0
 249     // output has no line-feeds before its last byte
 250     outputByte(w, '\n')
 251 
 252     if err != nil {
 253         return err
 254     }
 255     return jr.handleTrailingJunk()
 256 }
 257 
 258 // handleArrayJSONL handles top-level arrays for func jsonl
 259 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader) error {
 260     if err := jr.demandSyntax('['); err != nil {
 261         return err
 262     }
 263 
 264     for n := 0; true; n++ {
 265         // there may be whitespace/comments before the next comma
 266         if err := jr.seekNext(); err != nil {
 267             return err
 268         }
 269 
 270         // handle commas between values, as well as trailing ones
 271         comma := false
 272         b, _ := jr.peekByte()
 273         if b == ',' {
 274             jr.readByte()
 275             comma = true
 276 
 277             // there may be whitespace/comments before an ending ']'
 278             if err := jr.seekNext(); err != nil {
 279                 return err
 280             }
 281             b, _ = jr.peekByte()
 282         }
 283 
 284         // handle end of array
 285         if b == ']' {
 286             jr.readByte()
 287             if n > 0 {
 288                 err := outputByte(w, '\n')
 289                 w.Flush()
 290                 return err
 291             }
 292             return nil
 293         }
 294 
 295         // turn commas between adjacent values into line-feeds, as the
 296         // output for this custom func is supposed to be JSON Lines
 297         if n > 0 {
 298             if !comma {
 299                 return errNoArrayComma
 300             }
 301             if err := outputByte(w, '\n'); err != nil {
 302                 return err
 303             }
 304             w.Flush()
 305         }
 306 
 307         // handle the next value
 308         if err := jr.seekNext(); err != nil {
 309             return err
 310         }
 311         if err := handleValue(w, jr); err != nil {
 312             return err
 313         }
 314     }
 315 
 316     // make the compiler happy
 317     return nil
 318 }
 319 
 320 // jsonReader reads data via a buffer, keeping track of the input position:
 321 // this in turn allows showing much more useful errors, when these happen
 322 type jsonReader struct {
 323     // r is the actual reader
 324     r *bufio.Reader
 325 
 326     // line is the 1-based line-counter for input bytes, and gives errors
 327     // useful position info
 328     line int
 329 
 330     // pos is the 1-based `horizontal` position in its line, and gives
 331     // errors useful position info
 332     pos int
 333 }
 334 
 335 // improveError makes any error more useful, by giving it info about the
 336 // current input-position, as a 1-based line/within-line-position pair
 337 func (jr jsonReader) improveError(err error) error {
 338     if _, ok := err.(linePosError); ok {
 339         return err
 340     }
 341 
 342     if err == io.EOF {
 343         return linePosError{jr.line, jr.pos, errInputEarlyEnd}
 344     }
 345     if err != nil {
 346         return linePosError{jr.line, jr.pos, err}
 347     }
 348     return nil
 349 }
 350 
 351 func (jr *jsonReader) handleLeadingJunk() error {
 352     // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order
 353     // mark) gives no useful info if present, as UTF-8 leaves no ambiguity
 354     // about byte-order by design
 355     jr.skipUTF8BOM()
 356 
 357     // ignore leading whitespace and/or comments
 358     return jr.seekNext()
 359 }
 360 
 361 func (jr *jsonReader) handleTrailingJunk() error {
 362     // ignore trailing whitespace and/or comments
 363     if err := jr.seekNext(); err != nil {
 364         return err
 365     }
 366 
 367     // ignore trailing semicolon
 368     if b, ok := jr.peekByte(); ok && b == ';' {
 369         jr.readByte()
 370         // ignore trailing whitespace and/or comments
 371         if err := jr.seekNext(); err != nil {
 372             return err
 373         }
 374     }
 375 
 376     // beyond trailing whitespace and/or comments, any more bytes
 377     // make the whole input data invalid JSON
 378     if _, ok := jr.peekByte(); ok {
 379         return jr.improveError(errExtraBytes)
 380     }
 381     return nil
 382 }
 383 
 384 // demandSyntax fails with an error when the next byte isn't the one given;
 385 // when it is, the byte is then read/skipped, and a nil error is returned
 386 func (jr *jsonReader) demandSyntax(syntax byte) error {
 387     chunk, err := jr.r.Peek(1)
 388     if err == io.EOF {
 389         return jr.improveError(errInputEarlyEnd)
 390     }
 391     if err != nil {
 392         return jr.improveError(err)
 393     }
 394 
 395     if len(chunk) < 1 || chunk[0] != syntax {
 396         msg := `expected ` + string(rune(syntax))
 397         return jr.improveError(errors.New(msg))
 398     }
 399 
 400     jr.readByte()
 401     return nil
 402 }
 403 
 404 // updatePosInfo does what it says, given the byte just read separately
 405 func (jr *jsonReader) updatePosInfo(r rune) {
 406     if r == '\n' {
 407         jr.line += 1
 408         jr.pos = 1
 409     } else {
 410         jr.pos++
 411     }
 412 }
 413 
 414 // peekByte simplifies control-flow for various other funcs
 415 func (jr jsonReader) peekByte() (b byte, ok bool) {
 416     chunk, err := jr.r.Peek(1)
 417     if err == nil && len(chunk) >= 1 {
 418         return chunk[0], true
 419     }
 420     return 0, false
 421 }
 422 
 423 // readByte does what it says, updating the reader's position info
 424 func (jr *jsonReader) readByte() (b byte, err error) {
 425     b, err = jr.r.ReadByte()
 426     if err == nil {
 427         jr.updatePosInfo(rune(b))
 428         return b, nil
 429     }
 430     return b, jr.improveError(err)
 431 }
 432 
 433 // readRune does what it says, updating the reader's position info
 434 func (jr *jsonReader) readRune() (r rune, err error) {
 435     r, _, err = jr.r.ReadRune()
 436     if err == nil {
 437         jr.updatePosInfo(r)
 438         return r, nil
 439     }
 440     return r, jr.improveError(err)
 441 }
 442 
 443 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols
 444 // and comments, either single-line (starting with //) or general (starting
 445 // with /* and ending with */)
 446 func (jr *jsonReader) seekNext() error {
 447     for {
 448         b, ok := jr.peekByte()
 449         if !ok {
 450             return nil
 451         }
 452 
 453         // case ' ', '\t', '\f', '\v', '\r', '\n':
 454         if b <= 32 {
 455             // keep skipping whitespace bytes
 456             jr.readByte()
 457             continue
 458         }
 459 
 460         if b != '/' {
 461             // reached the next token
 462             return nil
 463         }
 464 
 465         if err := jr.skipComment(); err != nil {
 466             return err
 467         }
 468 
 469         // after comments, keep looking for more whitespace and/or comments
 470     }
 471 }
 472 
 473 // skipComment helps func seekNext skip over comments, simplifying the latter
 474 // func's control-flow
 475 func (jr *jsonReader) skipComment() error {
 476     err := jr.demandSyntax('/')
 477     if err != nil {
 478         return err
 479     }
 480 
 481     b, ok := jr.peekByte()
 482     if !ok {
 483         return jr.improveError(errInputEarlyEnd)
 484     }
 485 
 486     switch b {
 487     case '/':
 488         // handle single-line comments
 489         return jr.skipLine()
 490 
 491     case '*':
 492         // handle (potentially) multi-line comments
 493         return jr.skipGeneralComment()
 494 
 495     default:
 496         return jr.improveError(errInvalidComment)
 497     }
 498 }
 499 
 500 // skipLine handles single-line comments for func skipComment
 501 func (jr *jsonReader) skipLine() error {
 502     for {
 503         b, err := jr.readByte()
 504         if err == io.EOF {
 505             // end of input is fine in this case
 506             return nil
 507         }
 508         if err != nil {
 509             return err
 510         }
 511 
 512         if b == '\n' {
 513             return nil
 514         }
 515     }
 516 }
 517 
 518 // skipGeneralComment handles (potentially) multi-line comments for func
 519 // skipComment
 520 func (jr *jsonReader) skipGeneralComment() error {
 521     var prev byte
 522     for {
 523         b, err := jr.readByte()
 524         if err != nil {
 525             return jr.improveError(errCommentEarlyEnd)
 526         }
 527 
 528         if prev == '*' && b == '/' {
 529             return nil
 530         }
 531         if b == '\n' {
 532             jr.line++
 533         }
 534         prev = b
 535     }
 536 }
 537 
 538 // skipUTF8BOM does what it says, if a UTF-8 BOM is present
 539 func (jr *jsonReader) skipUTF8BOM() {
 540     lead, err := jr.r.Peek(3)
 541     if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) {
 542         jr.readByte()
 543         jr.readByte()
 544         jr.readByte()
 545         jr.pos += 3
 546     }
 547 }
 548 
 549 // outputByte is a small wrapper on func WriteByte, which adapts any error
 550 // into a custom dummy output-error, which is in turn meant to be ignored,
 551 // being just an excuse to quit the app immediately and successfully
 552 func outputByte(w *bufio.Writer, b byte) error {
 553     err := w.WriteByte(b)
 554     if err == nil {
 555         return nil
 556     }
 557     return errNoMoreOutput
 558 }
 559 
 560 // handleArray handles arrays for func handleValue
 561 func handleArray(w *bufio.Writer, jr *jsonReader) error {
 562     if err := jr.demandSyntax('['); err != nil {
 563         return err
 564     }
 565     w.WriteByte('[')
 566 
 567     for n := 0; true; n++ {
 568         // there may be whitespace/comments before the next comma
 569         if err := jr.seekNext(); err != nil {
 570             return err
 571         }
 572 
 573         // handle commas between values, as well as trailing ones
 574         comma := false
 575         b, _ := jr.peekByte()
 576         if b == ',' {
 577             jr.readByte()
 578             comma = true
 579 
 580             // there may be whitespace/comments before an ending ']'
 581             if err := jr.seekNext(); err != nil {
 582                 return err
 583             }
 584             b, _ = jr.peekByte()
 585         }
 586 
 587         // handle end of array
 588         if b == ']' {
 589             jr.readByte()
 590             w.WriteByte(']')
 591             return nil
 592         }
 593 
 594         // don't forget commas between adjacent values
 595         if n > 0 {
 596             if !comma {
 597                 return errNoArrayComma
 598             }
 599             if err := outputByte(w, ','); err != nil {
 600                 return err
 601             }
 602         }
 603 
 604         // handle the next value
 605         if err := jr.seekNext(); err != nil {
 606             return err
 607         }
 608         if err := handleValue(w, jr); err != nil {
 609             return err
 610         }
 611     }
 612 
 613     // make the compiler happy
 614     return nil
 615 }
 616 
 617 // handleDigits helps various number-handling funcs do their job
 618 func handleDigits(w *bufio.Writer, jr *jsonReader) error {
 619     for n := 0; true; n++ {
 620         b, _ := jr.peekByte()
 621 
 622         // support `nice` long numbers by ignoring their underscores
 623         if b == '_' {
 624             jr.readByte()
 625             continue
 626         }
 627 
 628         if '0' <= b && b <= '9' {
 629             jr.readByte()
 630             w.WriteByte(b)
 631             continue
 632         }
 633 
 634         if n == 0 {
 635             return errNoDigits
 636         }
 637         return nil
 638     }
 639 
 640     // make the compiler happy
 641     return nil
 642 }
 643 
 644 // handleDot handles pseudo-JSON numbers which start with a decimal dot
 645 func handleDot(w *bufio.Writer, jr *jsonReader) error {
 646     if err := jr.demandSyntax('.'); err != nil {
 647         return err
 648     }
 649     w.Write([]byte{'0', '.'})
 650     return handleDigits(w, jr)
 651 }
 652 
 653 // handleKey is used by func handleObjects and generalizes func handleString,
 654 // by allowing unquoted object keys; it's not used anywhere else, as allowing
 655 // unquoted string values is ambiguous with actual JSON-keyword values null,
 656 // false, and true.
 657 func handleKey(w *bufio.Writer, jr *jsonReader) error {
 658     quote, ok := jr.peekByte()
 659     if !ok {
 660         return jr.improveError(errStringEarlyEnd)
 661     }
 662 
 663     if quote == '"' || quote == '\'' {
 664         return handleString(w, jr)
 665     }
 666 
 667     w.WriteByte('"')
 668     for {
 669         if b, _ := jr.peekByte(); isIdentifier[b] {
 670             jr.readByte()
 671             w.WriteByte(b)
 672             continue
 673         }
 674 
 675         w.WriteByte('"')
 676         return nil
 677     }
 678 }
 679 
 680 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes
 681 // are unescaped ASCII symbols: this is a very common case for strings, and is
 682 // almost always the case for object keys; returns whether it succeeded, so
 683 // this func's caller knows knows if it needs to do anything, the slower way
 684 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) {
 685     chunk, _ := jr.r.Peek(64)
 686 
 687     for i, b := range chunk {
 688         if b < 32 || b > 127 || b == '\\' {
 689             return false
 690         }
 691         if b != quote {
 692             continue
 693         }
 694 
 695         // bulk-writing the chunk is this func's whole point
 696         w.WriteByte('"')
 697         w.Write(chunk[:i])
 698         w.WriteByte('"')
 699 
 700         jr.r.Discard(i + 1)
 701         return true
 702     }
 703 
 704     // maybe the inner-string is ok, but it's just longer than the chunk
 705     return false
 706 }
 707 
 708 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue
 709 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error {
 710     for rest := kw; len(rest) > 0; rest = rest[1:] {
 711         b, err := jr.readByte()
 712         if err == nil && b == rest[0] {
 713             // keywords given to this func have no line-feeds
 714             jr.pos++
 715             continue
 716         }
 717 
 718         msg := `expected JSON value ` + string(kw)
 719         return jr.improveError(errors.New(msg))
 720     }
 721 
 722     w.Write(kw)
 723     return nil
 724 }
 725 
 726 // handleNegative handles numbers starting with a negative sign for func
 727 // handleValue
 728 func handleNegative(w *bufio.Writer, jr *jsonReader) error {
 729     if err := jr.demandSyntax('-'); err != nil {
 730         return err
 731     }
 732 
 733     w.WriteByte('-')
 734     if b, _ := jr.peekByte(); b == '.' {
 735         jr.readByte()
 736         w.Write([]byte{'0', '.'})
 737         return handleDigits(w, jr)
 738     }
 739     return handleNumber(w, jr)
 740 }
 741 
 742 // handleNumber handles numeric values/tokens, including invalid-JSON cases,
 743 // such as values starting with a decimal dot
 744 func handleNumber(w *bufio.Writer, jr *jsonReader) error {
 745     // handle integer digits
 746     if err := handleDigits(w, jr); err != nil {
 747         return err
 748     }
 749 
 750     // handle optional decimal digits, starting with a leading dot
 751     if b, _ := jr.peekByte(); b == '.' {
 752         jr.readByte()
 753         w.WriteByte('.')
 754         return handleDigits(w, jr)
 755     }
 756 
 757     // handle optional exponent digits
 758     if b, _ := jr.peekByte(); b == 'e' || b == 'E' {
 759         jr.readByte()
 760         w.WriteByte(b)
 761         b, _ = jr.peekByte()
 762         if b == '+' {
 763             jr.readByte()
 764         } else if b == '-' {
 765             w.WriteByte('-')
 766             jr.readByte()
 767         }
 768         return handleDigits(w, jr)
 769     }
 770 
 771     return nil
 772 }
 773 
 774 // handleObject handles objects for func handleValue
 775 func handleObject(w *bufio.Writer, jr *jsonReader) error {
 776     if err := jr.demandSyntax('{'); err != nil {
 777         return err
 778     }
 779     w.WriteByte('{')
 780 
 781     for npairs := 0; true; npairs++ {
 782         // there may be whitespace/comments before the next comma
 783         if err := jr.seekNext(); err != nil {
 784             return err
 785         }
 786 
 787         // handle commas between key-value pairs, as well as trailing ones
 788         comma := false
 789         b, _ := jr.peekByte()
 790         if b == ',' {
 791             jr.readByte()
 792             comma = true
 793 
 794             // there may be whitespace/comments before an ending '}'
 795             if err := jr.seekNext(); err != nil {
 796                 return err
 797             }
 798             b, _ = jr.peekByte()
 799         }
 800 
 801         // handle end of object
 802         if b == '}' {
 803             jr.readByte()
 804             w.WriteByte('}')
 805             return nil
 806         }
 807 
 808         // don't forget commas between adjacent key-value pairs
 809         if npairs > 0 {
 810             if !comma {
 811                 return errNoObjectComma
 812             }
 813             if err := outputByte(w, ','); err != nil {
 814                 return err
 815             }
 816         }
 817 
 818         // handle the next pair's key
 819         if err := jr.seekNext(); err != nil {
 820             return err
 821         }
 822         if err := handleKey(w, jr); err != nil {
 823             return err
 824         }
 825 
 826         // demand a colon right after the key
 827         if err := jr.seekNext(); err != nil {
 828             return err
 829         }
 830         if err := jr.demandSyntax(':'); err != nil {
 831             return err
 832         }
 833         w.WriteByte(':')
 834 
 835         // handle the next pair's value
 836         if err := jr.seekNext(); err != nil {
 837             return err
 838         }
 839         if err := handleValue(w, jr); err != nil {
 840             return err
 841         }
 842     }
 843 
 844     // make the compiler happy
 845     return nil
 846 }
 847 
 848 // handlePositive handles numbers starting with a positive sign for func
 849 // handleValue
 850 func handlePositive(w *bufio.Writer, jr *jsonReader) error {
 851     if err := jr.demandSyntax('+'); err != nil {
 852         return err
 853     }
 854 
 855     // valid JSON isn't supposed to have leading pluses on numbers, so
 856     // emit nothing for it, unlike for negative numbers
 857 
 858     if b, _ := jr.peekByte(); b == '.' {
 859         jr.readByte()
 860         w.Write([]byte{'0', '.'})
 861         return handleDigits(w, jr)
 862     }
 863     return handleNumber(w, jr)
 864 }
 865 
 866 // handleString handles strings for funcs handleValue and handleObject, and
 867 // supports both single-quotes and double-quotes, always emitting the latter
 868 // in the output, of course
 869 func handleString(w *bufio.Writer, jr *jsonReader) error {
 870     q, ok := jr.peekByte()
 871     if !ok || (q != '"' && q != '\'') {
 872         return errNoStringQuote
 873     }
 874 
 875     jr.readByte()
 876     quote := rune(q)
 877 
 878     // try the quicker all-unescaped-ASCII handler
 879     if trySimpleInner(w, jr, q) {
 880         return nil
 881     }
 882 
 883     // it's a non-trivial inner-string, so handle it byte-by-byte
 884     w.WriteByte('"')
 885     escaped := false
 886 
 887     for {
 888         r, err := jr.readRune()
 889         if r == unicode.ReplacementChar {
 890             return jr.improveError(errInvalidRune)
 891         }
 892         if err != nil {
 893             if err == io.EOF {
 894                 return jr.improveError(errStringEarlyEnd)
 895             }
 896             return jr.improveError(err)
 897         }
 898 
 899         if !escaped {
 900             if r == '\\' {
 901                 escaped = true
 902                 continue
 903             }
 904 
 905             // handle end of string
 906             if r == quote {
 907                 return outputByte(w, '"')
 908             }
 909 
 910             if r <= 127 {
 911                 w.Write(escapedStringBytes[byte(r)])
 912             } else {
 913                 w.WriteRune(r)
 914             }
 915             continue
 916         }
 917 
 918         // handle escaped items
 919         escaped = false
 920 
 921         switch r {
 922         case 'u':
 923             // \u needs exactly 4 hex-digits to follow it
 924             w.Write([]byte{'\\', 'u'})
 925             if err := copyHex(w, 4, jr); err != nil {
 926                 return jr.improveError(err)
 927             }
 928 
 929         case 'x':
 930             // JSON only supports 4 escaped hex-digits, so pad the 2
 931             // expected hex-digits with 2 zeros
 932             w.Write([]byte{'\\', 'u', '0', '0'})
 933             if err := copyHex(w, 2, jr); err != nil {
 934                 return jr.improveError(err)
 935             }
 936 
 937         case 't', 'f', 'r', 'n', 'b', '\\', '"':
 938             // handle valid-JSON escaped string sequences
 939             w.WriteByte('\\')
 940             w.WriteByte(byte(r))
 941 
 942         case '\'':
 943             // escaped single-quotes aren't standard JSON, but they can
 944             // be handy when the input uses non-standard single-quoted
 945             // strings
 946             w.WriteByte('\'')
 947 
 948         default:
 949             if r <= 127 {
 950                 w.Write(escapedStringBytes[byte(r)])
 951             } else {
 952                 w.WriteRune(r)
 953             }
 954         }
 955     }
 956 }
 957 
 958 // copyHex handles a run of hex-digits for func handleString, starting right
 959 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its
 960 // errors with position info: that's up to the caller
 961 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error {
 962     for i := 0; i < n; i++ {
 963         b, err := jr.readByte()
 964         if err == io.EOF {
 965             return errStringEarlyEnd
 966         }
 967         if err != nil {
 968             return err
 969         }
 970 
 971         if b >= 128 {
 972             return errInvalidHex
 973         }
 974 
 975         if b := matchHex[b]; b != 0 {
 976             w.WriteByte(b)
 977             continue
 978         }
 979 
 980         return errInvalidHex
 981     }
 982 
 983     return nil
 984 }
 985 
 986 // handleValue is a generic JSON-token handler, which allows the recursive
 987 // behavior to handle any kind of JSON/pseudo-JSON input
 988 func handleValue(w *bufio.Writer, jr *jsonReader) error {
 989     chunk, err := jr.r.Peek(1)
 990     if err == nil && len(chunk) >= 1 {
 991         return handleValueDispatch(w, jr, chunk[0])
 992     }
 993 
 994     if err == io.EOF {
 995         return jr.improveError(errInputEarlyEnd)
 996     }
 997     return jr.improveError(errInputEarlyEnd)
 998 }
 999 
1000 // handleValueDispatch simplifies control-flow for func handleValue
1001 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error {
1002     switch b {
1003     case 'f':
1004         return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'})
1005     case 'n':
1006         return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'})
1007     case 't':
1008         return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'})
1009     case 'F':
1010         return handleKeyword(w, jr, []byte{'F', 'a', 'l', 's', 'e'})
1011     case 'N':
1012         return handleKeyword(w, jr, []byte{'N', 'o', 'n', 'e'})
1013     case 'T':
1014         return handleKeyword(w, jr, []byte{'T', 'r', 'u', 'e'})
1015     case '.':
1016         return handleDot(w, jr)
1017     case '+':
1018         return handlePositive(w, jr)
1019     case '-':
1020         return handleNegative(w, jr)
1021     case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
1022         return handleNumber(w, jr)
1023     case '\'', '"':
1024         return handleString(w, jr)
1025     case '[':
1026         return handleArray(w, jr)
1027     case '{':
1028         return handleObject(w, jr)
1029     default:
1030         return jr.improveError(errInvalidToken)
1031     }
1032 }
1033 
1034 // escapedStringBytes helps func handleString treat all string bytes quickly
1035 // and correctly, using their officially-supported JSON escape sequences
1036 //
1037 // https://www.rfc-editor.org/rfc/rfc8259#section-7
1038 var escapedStringBytes = [256][]byte{
1039     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
1040     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
1041     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
1042     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
1043     {'\\', 'b'}, {'\\', 't'},
1044     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
1045     {'\\', 'f'}, {'\\', 'r'},
1046     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
1047     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
1048     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
1049     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
1050     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
1051     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
1052     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
1053     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
1054     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
1055     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
1056     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
1057     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
1058     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
1059     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
1060     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
1061     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
1062     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
1063     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
1064     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
1065     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
1066     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
1067     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
1068     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
1069     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
1070     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
1071     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
1072     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
1073     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
1074     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
1075     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
1076     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
1077     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
1078     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
1079     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
1080     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
1081     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
1082     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
1083 }