File: json0.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for json0: this version has no http(s) support. Even 27 the unit-tests from the original json0 are omitted. 28 29 To compile a smaller-sized command-line app, you can use the `go` command as 30 follows: 31 32 go build -ldflags "-s -w" -trimpath json0.go 33 */ 34 35 package main 36 37 import ( 38 "bufio" 39 "errors" 40 "io" 41 "os" 42 "strconv" 43 "unicode" 44 ) 45 46 const info = ` 47 json0 [options...] [file...] 48 49 50 JSON-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. 51 Its output is always a single line, which ends with a line-feed. 52 53 Besides minimizing bytes, this tool also adapts almost-JSON input into 54 valid JSON, since it 55 56 - ignores both rest-of-line and multi-line comments 57 - ignores extra/trailing commas in arrays and objects 58 - turns single-quoted strings/keys into double-quoted strings 59 - double-quotes unquoted object keys 60 - changes \x 2-hex-digit into \u 4-hex-digit string-escapes 61 62 All options available can either start with a single or a double-dash 63 64 -h, -help show this help message 65 -jsonl emit JSON Lines, when top-level value is an array 66 ` 67 68 const ( 69 bufSize = 32 * 1024 70 chunkPeekSize = 16 71 ) 72 73 func main() { 74 args := os.Args[1:] 75 buffered := false 76 handler := json0 77 78 for len(args) > 0 { 79 switch args[0] { 80 case `-b`, `--b`, `-buffered`, `--buffered`: 81 buffered = true 82 args = args[1:] 83 continue 84 85 case `-h`, `--h`, `-help`, `--help`: 86 os.Stdout.WriteString(info[1:]) 87 return 88 89 case `-jsonl`, `--jsonl`: 90 handler = jsonl 91 args = args[1:] 92 continue 93 } 94 95 break 96 } 97 98 if len(args) > 0 && args[0] == `--` { 99 args = args[1:] 100 } 101 102 if len(args) > 1 { 103 const msg = "multiple inputs aren't allowed\n" 104 os.Stderr.WriteString(msg) 105 os.Exit(1) 106 return 107 } 108 109 liveLines := !buffered 110 if !buffered { 111 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 112 liveLines = false 113 } 114 } 115 116 name := `-` 117 if len(args) == 1 { 118 name = args[0] 119 } 120 121 if err := run(os.Stdout, name, handler, liveLines); err != nil && err != io.EOF { 122 os.Stderr.WriteString(err.Error()) 123 os.Stderr.WriteString("\n") 124 os.Exit(1) 125 return 126 } 127 } 128 129 type handlerFunc func(w *bufio.Writer, r *bufio.Reader, live bool) error 130 131 func run(w io.Writer, name string, handler handlerFunc, live bool) error { 132 // f, _ := os.Create(`json0.prof`) 133 // defer f.Close() 134 // pprof.StartCPUProfile(f) 135 // defer pprof.StopCPUProfile() 136 137 if name == `` || name == `-` { 138 bw := bufio.NewWriterSize(w, bufSize) 139 br := bufio.NewReaderSize(os.Stdin, bufSize) 140 defer bw.Flush() 141 return handler(bw, br, live) 142 } 143 144 f, err := os.Open(name) 145 if err != nil { 146 return errors.New(`can't read from file named "` + name + `"`) 147 } 148 defer f.Close() 149 150 bw := bufio.NewWriterSize(w, bufSize) 151 br := bufio.NewReaderSize(f, bufSize) 152 defer bw.Flush() 153 return handler(bw, br, live) 154 } 155 156 var ( 157 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 158 errInputEarlyEnd = errors.New(`expected end of input data`) 159 errInvalidComment = errors.New(`expected / or *`) 160 errInvalidHex = errors.New(`expected a base-16 digit`) 161 errInvalidRune = errors.New(`invalid UTF-8 bytes`) 162 errInvalidToken = errors.New(`invalid JSON token`) 163 errNoDigits = errors.New(`expected numeric digits`) 164 errNoStringQuote = errors.New(`expected " or '`) 165 errNoArrayComma = errors.New(`missing comma between array values`) 166 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 167 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 168 errExtraBytes = errors.New(`unexpected extra input bytes`) 169 ) 170 171 // linePosError is a more descriptive kind of error, showing the source of 172 // the input-related problem, as 1-based a line/pos number pair in front 173 // of the error message 174 type linePosError struct { 175 // line is the 1-based line count from the input 176 line int 177 178 // pos is the 1-based `horizontal` position in its line 179 pos int 180 181 // err is the error message to `decorate` with the position info 182 err error 183 } 184 185 // Error satisfies the error interface 186 func (lpe linePosError) Error() string { 187 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 188 return where + `: ` + lpe.err.Error() 189 } 190 191 // isIdentifier improves control-flow of func handleKey, when it handles 192 // unquoted object keys 193 var isIdentifier = [256]bool{ 194 '_': true, 195 196 '0': true, '1': true, '2': true, '3': true, '4': true, 197 '5': true, '6': true, '7': true, '8': true, '9': true, 198 199 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 200 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 201 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 202 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 203 'Y': true, 'Z': true, 204 205 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 206 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 207 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 208 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 209 'y': true, 'z': true, 210 } 211 212 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 213 // being 0, and normalizes letter-case for the hex letters 214 var matchHex = [256]byte{ 215 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 216 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 217 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 218 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 219 } 220 221 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON; final boolean 222 // value isn't used, and is just there to match the signature of func jsonl 223 func json0(w *bufio.Writer, r *bufio.Reader, live bool) error { 224 jr := jsonReader{r, 1, 1} 225 defer w.Flush() 226 227 if err := jr.handleLeadingJunk(); err != nil { 228 return err 229 } 230 231 // handle a single top-level JSON value 232 err := handleValue(w, &jr) 233 234 // end the only output-line with a line-feed; this also avoids showing 235 // error messages on the same line as the main output, since JSON-0 236 // output has no line-feeds before its last byte 237 outputByte(w, '\n') 238 239 if err != nil { 240 return err 241 } 242 return jr.handleTrailingJunk() 243 } 244 245 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func 246 // avoids writing a trailing line-feed, leaving that up to its caller 247 func jsonl(w *bufio.Writer, r *bufio.Reader, live bool) error { 248 jr := jsonReader{r, 1, 1} 249 250 if err := jr.handleLeadingJunk(); err != nil { 251 return err 252 } 253 254 chunk, err := jr.r.Peek(1) 255 if err == nil && len(chunk) >= 1 { 256 switch b := chunk[0]; b { 257 case '[', '(': 258 return handleArrayJSONL(w, &jr, b, live) 259 } 260 } 261 262 // handle a single top-level JSON value 263 err = handleValue(w, &jr) 264 265 // end the only output-line with a line-feed; this also avoids showing 266 // error messages on the same line as the main output, since JSON-0 267 // output has no line-feeds before its last byte 268 outputByte(w, '\n') 269 270 if err != nil { 271 return err 272 } 273 return jr.handleTrailingJunk() 274 } 275 276 // handleArrayJSONL handles top-level arrays for func jsonl 277 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader, start byte, live bool) error { 278 if err := jr.demandSyntax(start); err != nil { 279 return err 280 } 281 282 var end byte = ']' 283 if start == '(' { 284 end = ')' 285 } 286 287 for n := 0; true; n++ { 288 // there may be whitespace/comments before the next comma 289 if err := jr.seekNext(); err != nil { 290 return err 291 } 292 293 // handle commas between values, as well as trailing ones 294 comma := false 295 b, _ := jr.peekByte() 296 if b == ',' { 297 jr.readByte() 298 comma = true 299 300 // there may be whitespace/comments before an ending ']' 301 if err := jr.seekNext(); err != nil { 302 return err 303 } 304 b, _ = jr.peekByte() 305 } 306 307 // handle end of array 308 if b == end { 309 jr.readByte() 310 if n > 0 { 311 err := outputByte(w, '\n') 312 if live { 313 w.Flush() 314 } 315 return err 316 } 317 return nil 318 } 319 320 // turn commas between adjacent values into line-feeds, as the 321 // output for this custom func is supposed to be JSON Lines 322 if n > 0 { 323 if !comma { 324 return errNoArrayComma 325 } 326 if err := outputByte(w, '\n'); err != nil { 327 return err 328 } 329 if live { 330 w.Flush() 331 } 332 } 333 334 // handle the next value 335 if err := jr.seekNext(); err != nil { 336 return err 337 } 338 if err := handleValue(w, jr); err != nil { 339 return err 340 } 341 } 342 343 // make the compiler happy 344 return nil 345 } 346 347 // jsonReader reads data via a buffer, keeping track of the input position: 348 // this in turn allows showing much more useful errors, when these happen 349 type jsonReader struct { 350 // r is the actual reader 351 r *bufio.Reader 352 353 // line is the 1-based line-counter for input bytes, and gives errors 354 // useful position info 355 line int 356 357 // pos is the 1-based `horizontal` position in its line, and gives 358 // errors useful position info 359 pos int 360 } 361 362 // improveError makes any error more useful, by giving it info about the 363 // current input-position, as a 1-based line/within-line-position pair 364 func (jr jsonReader) improveError(err error) error { 365 if _, ok := err.(linePosError); ok { 366 return err 367 } 368 369 if err == io.EOF { 370 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 371 } 372 if err != nil { 373 return linePosError{jr.line, jr.pos, err} 374 } 375 return nil 376 } 377 378 func (jr *jsonReader) handleLeadingJunk() error { 379 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 380 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 381 // about byte-order by design 382 jr.skipUTF8BOM() 383 384 // ignore leading whitespace and/or comments 385 return jr.seekNext() 386 } 387 388 func (jr *jsonReader) handleTrailingJunk() error { 389 // ignore trailing whitespace and/or comments 390 if err := jr.seekNext(); err != nil { 391 return err 392 } 393 394 // ignore trailing semicolons 395 for { 396 if b, ok := jr.peekByte(); !ok || b != ';' { 397 break 398 } 399 400 jr.readByte() 401 // ignore trailing whitespace and/or comments 402 if err := jr.seekNext(); err != nil { 403 return err 404 } 405 } 406 407 // beyond trailing whitespace and/or comments, any more bytes 408 // make the whole input data invalid JSON 409 if _, ok := jr.peekByte(); ok { 410 return jr.improveError(errExtraBytes) 411 } 412 return nil 413 } 414 415 // demandSyntax fails with an error when the next byte isn't the one given; 416 // when it is, the byte is then read/skipped, and a nil error is returned 417 func (jr *jsonReader) demandSyntax(syntax byte) error { 418 chunk, err := jr.r.Peek(1) 419 if err == io.EOF { 420 return jr.improveError(errInputEarlyEnd) 421 } 422 if err != nil { 423 return jr.improveError(err) 424 } 425 426 if len(chunk) < 1 || chunk[0] != syntax { 427 msg := `expected ` + string(rune(syntax)) 428 return jr.improveError(errors.New(msg)) 429 } 430 431 jr.readByte() 432 return nil 433 } 434 435 // peekByte simplifies control-flow for various other funcs 436 func (jr jsonReader) peekByte() (b byte, ok bool) { 437 chunk, err := jr.r.Peek(1) 438 if err == nil && len(chunk) >= 1 { 439 return chunk[0], true 440 } 441 return 0, false 442 } 443 444 // readByte does what it says, updating the reader's position info 445 func (jr *jsonReader) readByte() (b byte, err error) { 446 b, err = jr.r.ReadByte() 447 if err == nil { 448 if b == '\n' { 449 jr.line += 1 450 jr.pos = 1 451 } else { 452 jr.pos++ 453 } 454 return b, nil 455 } 456 return b, jr.improveError(err) 457 } 458 459 // readRune does what it says, updating the reader's position info 460 func (jr *jsonReader) readRune() (r rune, err error) { 461 r, _, err = jr.r.ReadRune() 462 if err == nil { 463 if r == '\n' { 464 jr.line += 1 465 jr.pos = 1 466 } else { 467 jr.pos++ 468 } 469 return r, nil 470 } 471 return r, jr.improveError(err) 472 } 473 474 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 475 // and comments, either single-line (starting with //) or general (starting 476 // with /* and ending with */) 477 func (jr *jsonReader) seekNext() error { 478 for { 479 b, ok := jr.peekByte() 480 if !ok { 481 return nil 482 } 483 484 // case ' ', '\t', '\f', '\v', '\r', '\n': 485 if b <= 32 { 486 // keep skipping whitespace bytes 487 jr.readByte() 488 continue 489 } 490 491 if b == '#' { 492 if err := jr.skipLine(); err != nil { 493 return err 494 } 495 continue 496 } 497 498 if b != '/' { 499 // reached the next token 500 return nil 501 } 502 503 if err := jr.skipComment(); err != nil { 504 return err 505 } 506 507 // after comments, keep looking for more whitespace and/or comments 508 } 509 } 510 511 // skipComment helps func seekNext skip over comments, simplifying the latter 512 // func's control-flow 513 func (jr *jsonReader) skipComment() error { 514 err := jr.demandSyntax('/') 515 if err != nil { 516 return err 517 } 518 519 b, ok := jr.peekByte() 520 if !ok { 521 return nil 522 } 523 524 switch b { 525 case '/': 526 // handle single-line comments 527 return jr.skipLine() 528 529 case '*': 530 // handle (potentially) multi-line comments 531 return jr.skipGeneralComment() 532 533 default: 534 return jr.improveError(errInvalidComment) 535 } 536 } 537 538 // skipLine handles single-line comments for func skipComment 539 func (jr *jsonReader) skipLine() error { 540 for { 541 b, err := jr.readByte() 542 if err == io.EOF { 543 // end of input is fine in this case 544 return nil 545 } 546 if err != nil { 547 return err 548 } 549 550 if b == '\n' { 551 return nil 552 } 553 } 554 } 555 556 // skipGeneralComment handles (potentially) multi-line comments for func 557 // skipComment 558 func (jr *jsonReader) skipGeneralComment() error { 559 var prev byte 560 for { 561 b, err := jr.readByte() 562 if err != nil { 563 return jr.improveError(errCommentEarlyEnd) 564 } 565 566 if prev == '*' && b == '/' { 567 return nil 568 } 569 if b == '\n' { 570 jr.line++ 571 } 572 prev = b 573 } 574 } 575 576 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 577 func (jr *jsonReader) skipUTF8BOM() { 578 lead, err := jr.r.Peek(3) 579 if err != nil { 580 return 581 } 582 583 if len(lead) > 2 && lead[0] == 0xef && lead[1] == 0xbb && lead[2] == 0xbf { 584 jr.readByte() 585 jr.readByte() 586 jr.readByte() 587 } 588 } 589 590 // outputByte is a small wrapper on func WriteByte, which adapts any error 591 // into a custom dummy output-error, which is in turn meant to be ignored, 592 // being just an excuse to quit the app immediately and successfully 593 func outputByte(w *bufio.Writer, b byte) error { 594 err := w.WriteByte(b) 595 if err == nil { 596 return nil 597 } 598 return io.EOF 599 } 600 601 // handleArray handles arrays for func handleValue 602 func handleArray(w *bufio.Writer, jr *jsonReader, start byte) error { 603 if err := jr.demandSyntax(start); err != nil { 604 return err 605 } 606 607 var end byte = ']' 608 if start == '(' { 609 end = ')' 610 } 611 612 w.WriteByte('[') 613 614 for n := 0; true; n++ { 615 // there may be whitespace/comments before the next comma 616 if err := jr.seekNext(); err != nil { 617 return err 618 } 619 620 // handle commas between values, as well as trailing ones 621 comma := false 622 b, _ := jr.peekByte() 623 if b == ',' { 624 jr.readByte() 625 comma = true 626 627 // there may be whitespace/comments before an ending ']' 628 if err := jr.seekNext(); err != nil { 629 return err 630 } 631 b, _ = jr.peekByte() 632 } 633 634 // handle end of array 635 if b == end { 636 jr.readByte() 637 w.WriteByte(']') 638 return nil 639 } 640 641 // don't forget commas between adjacent values 642 if n > 0 { 643 if !comma { 644 return errNoArrayComma 645 } 646 if err := outputByte(w, ','); err != nil { 647 return err 648 } 649 } 650 651 // handle the next value 652 if err := jr.seekNext(); err != nil { 653 return err 654 } 655 if err := handleValue(w, jr); err != nil { 656 return err 657 } 658 } 659 660 // make the compiler happy 661 return nil 662 } 663 664 // handleDigits helps various number-handling funcs do their job 665 func handleDigits(w *bufio.Writer, jr *jsonReader) error { 666 if trySimpleDigits(w, jr) { 667 return nil 668 } 669 670 for n := 0; true; n++ { 671 b, _ := jr.peekByte() 672 673 // support `nice` long numbers by ignoring their underscores 674 if b == '_' { 675 jr.readByte() 676 continue 677 } 678 679 if '0' <= b && b <= '9' { 680 jr.readByte() 681 w.WriteByte(b) 682 continue 683 } 684 685 if n == 0 { 686 return errNoDigits 687 } 688 return nil 689 } 690 691 // make the compiler happy 692 return nil 693 } 694 695 // trySimpleDigits tries to handle (more quickly) digit-runs where all bytes 696 // are just digits: this is a very common case for numbers; returns whether 697 // it succeeded, so this func's caller knows knows if it needs to do anything, 698 // the slower way 699 func trySimpleDigits(w *bufio.Writer, jr *jsonReader) (gotIt bool) { 700 chunk, _ := jr.r.Peek(chunkPeekSize) 701 702 for i, b := range chunk { 703 if '0' <= b && b <= '9' { 704 continue 705 } 706 707 if i == 0 || b == '_' { 708 return false 709 } 710 711 // bulk-writing the chunk is this func's whole point 712 w.Write(chunk[:i]) 713 714 jr.r.Discard(i) 715 jr.pos += i 716 return true 717 } 718 719 // maybe the digits-run is ok, but it's just longer than the chunk 720 return false 721 } 722 723 // handleDot handles pseudo-JSON numbers which start with a decimal dot 724 func handleDot(w *bufio.Writer, jr *jsonReader) error { 725 if err := jr.demandSyntax('.'); err != nil { 726 return err 727 } 728 w.Write([]byte{'0', '.'}) 729 return handleDigits(w, jr) 730 } 731 732 // handleKey is used by func handleObjects and generalizes func handleString, 733 // by allowing unquoted object keys; it's not used anywhere else, as allowing 734 // unquoted string values is ambiguous with actual JSON-keyword values null, 735 // false, and true. 736 func handleKey(w *bufio.Writer, jr *jsonReader) error { 737 quote, ok := jr.peekByte() 738 if !ok { 739 return jr.improveError(errStringEarlyEnd) 740 } 741 742 if quote == '"' || quote == '\'' { 743 return handleString(w, jr, quote) 744 } 745 746 w.WriteByte('"') 747 for { 748 if b, _ := jr.peekByte(); isIdentifier[b] { 749 jr.readByte() 750 w.WriteByte(b) 751 continue 752 } 753 754 w.WriteByte('"') 755 return nil 756 } 757 } 758 759 // trySimpleString tries to handle (more quickly) inner-strings where all bytes 760 // are unescaped ASCII symbols: this is a very common case for strings, and is 761 // almost always the case for object keys; returns whether it succeeded, so 762 // this func's caller knows knows if it needs to do anything, the slower way 763 func trySimpleString(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) { 764 end := -1 765 chunk, _ := jr.r.Peek(chunkPeekSize) 766 767 for i, b := range chunk { 768 if 32 <= b && b <= 127 && b != '\\' && b != '\'' && b != '"' { 769 continue 770 } 771 772 if b == byte(quote) { 773 end = i 774 break 775 } 776 return false 777 } 778 779 if end < 0 { 780 return false 781 } 782 783 // bulk-writing the chunk is this func's whole point 784 w.WriteByte('"') 785 w.Write(chunk[:end]) 786 w.WriteByte('"') 787 788 jr.r.Discard(end + 1) 789 jr.pos += end + 1 790 return true 791 } 792 793 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue 794 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { 795 for rest := kw; len(rest) > 0; rest = rest[1:] { 796 b, err := jr.readByte() 797 if err == nil && b == rest[0] { 798 // keywords given to this func have no line-feeds 799 jr.pos++ 800 continue 801 } 802 803 msg := `expected JSON value ` + string(kw) 804 return jr.improveError(errors.New(msg)) 805 } 806 807 w.Write(kw) 808 return nil 809 } 810 811 func replaceKeyword(w *bufio.Writer, jr *jsonReader, kw, with []byte) error { 812 for rest := kw; len(rest) > 0; rest = rest[1:] { 813 b, err := jr.readByte() 814 if err == nil && b == rest[0] { 815 // keywords given to this func have no line-feeds 816 jr.pos++ 817 continue 818 } 819 820 msg := `expected JSON value ` + string(kw) 821 return jr.improveError(errors.New(msg)) 822 } 823 824 w.Write(with) 825 return nil 826 } 827 828 // handleNegative handles numbers starting with a negative sign for func 829 // handleValue 830 func handleNegative(w *bufio.Writer, jr *jsonReader) error { 831 if err := jr.demandSyntax('-'); err != nil { 832 return err 833 } 834 835 w.WriteByte('-') 836 if b, _ := jr.peekByte(); b == '.' { 837 jr.readByte() 838 w.Write([]byte{'0', '.'}) 839 return handleDigits(w, jr) 840 } 841 return handleNumber(w, jr) 842 } 843 844 // handleNumber handles numeric values/tokens, including invalid-JSON cases, 845 // such as values starting with a decimal dot 846 func handleNumber(w *bufio.Writer, jr *jsonReader) error { 847 // handle integer digits 848 if err := handleDigits(w, jr); err != nil { 849 return err 850 } 851 852 // handle optional decimal digits, starting with a leading dot 853 if b, _ := jr.peekByte(); b == '.' { 854 jr.readByte() 855 w.WriteByte('.') 856 return handleDigits(w, jr) 857 } 858 859 // handle optional exponent digits 860 if b, _ := jr.peekByte(); b == 'e' || b == 'E' { 861 jr.readByte() 862 w.WriteByte(b) 863 b, _ = jr.peekByte() 864 if b == '+' { 865 jr.readByte() 866 } else if b == '-' { 867 w.WriteByte('-') 868 jr.readByte() 869 } 870 return handleDigits(w, jr) 871 } 872 873 return nil 874 } 875 876 // handleObject handles objects for func handleValue 877 func handleObject(w *bufio.Writer, jr *jsonReader) error { 878 if err := jr.demandSyntax('{'); err != nil { 879 return err 880 } 881 w.WriteByte('{') 882 883 for npairs := 0; true; npairs++ { 884 // there may be whitespace/comments before the next comma 885 if err := jr.seekNext(); err != nil { 886 return err 887 } 888 889 // handle commas between key-value pairs, as well as trailing ones 890 comma := false 891 b, _ := jr.peekByte() 892 if b == ',' { 893 jr.readByte() 894 comma = true 895 896 // there may be whitespace/comments before an ending '}' 897 if err := jr.seekNext(); err != nil { 898 return err 899 } 900 b, _ = jr.peekByte() 901 } 902 903 // handle end of object 904 if b == '}' { 905 jr.readByte() 906 w.WriteByte('}') 907 return nil 908 } 909 910 // don't forget commas between adjacent key-value pairs 911 if npairs > 0 { 912 if !comma { 913 return errNoObjectComma 914 } 915 if err := outputByte(w, ','); err != nil { 916 return err 917 } 918 } 919 920 // handle the next pair's key 921 if err := jr.seekNext(); err != nil { 922 return err 923 } 924 if err := handleKey(w, jr); err != nil { 925 return err 926 } 927 928 // demand a colon right after the key 929 if err := jr.seekNext(); err != nil { 930 return err 931 } 932 if err := jr.demandSyntax(':'); err != nil { 933 return err 934 } 935 w.WriteByte(':') 936 937 // handle the next pair's value 938 if err := jr.seekNext(); err != nil { 939 return err 940 } 941 if err := handleValue(w, jr); err != nil { 942 return err 943 } 944 } 945 946 // make the compiler happy 947 return nil 948 } 949 950 // handlePositive handles numbers starting with a positive sign for func 951 // handleValue 952 func handlePositive(w *bufio.Writer, jr *jsonReader) error { 953 if err := jr.demandSyntax('+'); err != nil { 954 return err 955 } 956 957 // valid JSON isn't supposed to have leading pluses on numbers, so 958 // emit nothing for it, unlike for negative numbers 959 960 if b, _ := jr.peekByte(); b == '.' { 961 jr.readByte() 962 w.Write([]byte{'0', '.'}) 963 return handleDigits(w, jr) 964 } 965 return handleNumber(w, jr) 966 } 967 968 // handleString handles strings for funcs handleValue and handleObject, and 969 // supports both single-quotes and double-quotes, always emitting the latter 970 // in the output, of course 971 func handleString(w *bufio.Writer, jr *jsonReader, quote byte) error { 972 if quote != '"' && quote != '\'' { 973 return errNoStringQuote 974 } 975 976 jr.readByte() 977 978 // try the quicker no-escapes ASCII handler 979 if trySimpleString(w, jr, quote) { 980 return nil 981 } 982 983 // it's a non-trivial inner-string, so handle it byte-by-byte 984 w.WriteByte('"') 985 escaped := false 986 987 for quote := rune(quote); true; { 988 r, err := jr.readRune() 989 if r == unicode.ReplacementChar { 990 return jr.improveError(errInvalidRune) 991 } 992 if err != nil { 993 if err == io.EOF { 994 return jr.improveError(errStringEarlyEnd) 995 } 996 return jr.improveError(err) 997 } 998 999 if !escaped { 1000 if r == '\\' { 1001 escaped = true 1002 continue 1003 } 1004 1005 // handle end of string 1006 if r == quote { 1007 return outputByte(w, '"') 1008 } 1009 1010 if r <= 127 { 1011 w.Write(escapedStringBytes[byte(r)]) 1012 } else { 1013 w.WriteRune(r) 1014 } 1015 continue 1016 } 1017 1018 // handle escaped items 1019 escaped = false 1020 1021 switch r { 1022 case 'u': 1023 // \u needs exactly 4 hex-digits to follow it 1024 w.Write([]byte{'\\', 'u'}) 1025 if err := copyHex(w, 4, jr); err != nil { 1026 return jr.improveError(err) 1027 } 1028 1029 case 'x': 1030 // JSON only supports 4 escaped hex-digits, so pad the 2 1031 // expected hex-digits with 2 zeros 1032 w.Write([]byte{'\\', 'u', '0', '0'}) 1033 if err := copyHex(w, 2, jr); err != nil { 1034 return jr.improveError(err) 1035 } 1036 1037 case 't', 'f', 'r', 'n', 'b', '\\', '"': 1038 // handle valid-JSON escaped string sequences 1039 w.WriteByte('\\') 1040 w.WriteByte(byte(r)) 1041 1042 case '\'': 1043 // escaped single-quotes aren't standard JSON, but they can 1044 // be handy when the input uses non-standard single-quoted 1045 // strings 1046 w.WriteByte('\'') 1047 1048 default: 1049 if r <= 127 { 1050 w.Write(escapedStringBytes[byte(r)]) 1051 } else { 1052 w.WriteRune(r) 1053 } 1054 } 1055 } 1056 1057 return nil 1058 } 1059 1060 // copyHex handles a run of hex-digits for func handleString, starting right 1061 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 1062 // errors with position info: that's up to the caller 1063 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 1064 for i := 0; i < n; i++ { 1065 b, err := jr.readByte() 1066 if err == io.EOF { 1067 return errStringEarlyEnd 1068 } 1069 if err != nil { 1070 return err 1071 } 1072 1073 if b >= 128 { 1074 return errInvalidHex 1075 } 1076 1077 if b := matchHex[b]; b != 0 { 1078 w.WriteByte(b) 1079 continue 1080 } 1081 1082 return errInvalidHex 1083 } 1084 1085 return nil 1086 } 1087 1088 // handleValue is a generic JSON-token handler, which allows the recursive 1089 // behavior to handle any kind of JSON/pseudo-JSON input 1090 func handleValue(w *bufio.Writer, jr *jsonReader) error { 1091 chunk, err := jr.r.Peek(1) 1092 if err == nil && len(chunk) >= 1 { 1093 return handleValueDispatch(w, jr, chunk[0]) 1094 } 1095 1096 if err == io.EOF { 1097 return jr.improveError(errInputEarlyEnd) 1098 } 1099 return jr.improveError(errInputEarlyEnd) 1100 } 1101 1102 // handleValueDispatch simplifies control-flow for func handleValue 1103 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { 1104 switch b { 1105 case '#': 1106 return jr.skipLine() 1107 case 'f': 1108 return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) 1109 case 'n': 1110 return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) 1111 case 't': 1112 return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) 1113 case 'F': 1114 return replaceKeyword(w, jr, []byte(`False`), []byte(`false`)) 1115 case 'N': 1116 return replaceKeyword(w, jr, []byte(`None`), []byte(`null`)) 1117 case 'T': 1118 return replaceKeyword(w, jr, []byte(`True`), []byte(`true`)) 1119 case '.': 1120 return handleDot(w, jr) 1121 case '+': 1122 return handlePositive(w, jr) 1123 case '-': 1124 return handleNegative(w, jr) 1125 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 1126 return handleNumber(w, jr) 1127 case '\'', '"': 1128 return handleString(w, jr, b) 1129 case '[', '(': 1130 return handleArray(w, jr, b) 1131 case '{': 1132 return handleObject(w, jr) 1133 default: 1134 return jr.improveError(errInvalidToken) 1135 } 1136 } 1137 1138 // escapedStringBytes helps func handleString treat all string bytes quickly 1139 // and correctly, using their officially-supported JSON escape sequences 1140 // 1141 // https://www.rfc-editor.org/rfc/rfc8259#section-7 1142 var escapedStringBytes = [256][]byte{ 1143 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 1144 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 1145 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 1146 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 1147 {'\\', 'b'}, {'\\', 't'}, 1148 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 1149 {'\\', 'f'}, {'\\', 'r'}, 1150 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 1151 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 1152 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 1153 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 1154 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 1155 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 1156 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 1157 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 1158 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 1159 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 1160 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 1161 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 1162 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 1163 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 1164 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 1165 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 1166 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 1167 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 1168 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 1169 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 1170 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 1171 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 1172 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 1173 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 1174 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 1175 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 1176 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 1177 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 1178 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 1179 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 1180 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 1181 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 1182 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 1183 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 1184 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 1185 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 1186 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 1187 }