File: json0.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for json0: this version has no http(s) support. Even 27 the unit-tests from the original json0 are omitted. 28 29 To compile a smaller-sized command-line app, you can use the `go` command as 30 follows: 31 32 go build -ldflags "-s -w" -trimpath json0.go 33 */ 34 35 package main 36 37 import ( 38 "bufio" 39 "errors" 40 "io" 41 "os" 42 "strconv" 43 "unicode" 44 ) 45 46 const info = ` 47 json0 [options...] [file...] 48 49 50 JSON-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. 51 Its output is always a single line, which ends with a line-feed. 52 53 Besides minimizing bytes, this tool also adapts almost-JSON input into 54 valid JSON, since it 55 56 - ignores both rest-of-line and multi-line comments 57 - ignores extra/trailing commas in arrays and objects 58 - turns single-quoted strings/keys into double-quoted strings 59 - double-quotes unquoted object keys 60 - changes \x 2-hex-digit into \u 4-hex-digit string-escapes 61 62 All options available can either start with a single or a double-dash 63 64 -h show this help message 65 -help show this help message 66 -jsonl emit JSON Lines, when top-level value is an array 67 ` 68 69 const ( 70 bufSize = 32 * 1024 71 chunkPeekSize = 16 72 ) 73 74 func main() { 75 args := os.Args[1:] 76 buffered := false 77 handler := json0 78 79 out: 80 for len(args) > 0 { 81 switch args[0] { 82 case `-b`, `--b`, `-buffered`, `--buffered`: 83 buffered = true 84 args = args[1:] 85 continue 86 87 case `-h`, `--h`, `-help`, `--help`: 88 os.Stdout.WriteString(info[1:]) 89 return 90 91 case `-jsonl`, `--jsonl`: 92 handler = jsonl 93 args = args[1:] 94 continue 95 96 default: 97 break out 98 } 99 } 100 101 if len(args) > 0 && args[0] == `--` { 102 args = args[1:] 103 } 104 105 if len(args) > 1 { 106 const msg = "multiple inputs aren't allowed\n" 107 os.Stderr.WriteString(msg) 108 os.Exit(1) 109 } 110 111 liveLines := !buffered 112 if !buffered { 113 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 114 liveLines = false 115 } 116 } 117 118 name := `-` 119 if len(args) == 1 { 120 name = args[0] 121 } 122 123 if err := run(os.Stdout, name, handler, liveLines); isActualError(err) { 124 os.Stderr.WriteString(err.Error()) 125 os.Stderr.WriteString("\n") 126 os.Exit(1) 127 } 128 } 129 130 type handlerFunc func(w *bufio.Writer, r *bufio.Reader, live bool) error 131 132 func run(w io.Writer, name string, handler handlerFunc, live bool) error { 133 // f, _ := os.Create(`json0.prof`) 134 // defer f.Close() 135 // pprof.StartCPUProfile(f) 136 // defer pprof.StopCPUProfile() 137 138 if name == `` || name == `-` { 139 bw := bufio.NewWriterSize(w, bufSize) 140 br := bufio.NewReaderSize(os.Stdin, bufSize) 141 defer bw.Flush() 142 return handler(bw, br, live) 143 } 144 145 f, err := os.Open(name) 146 if err != nil { 147 return errors.New(`can't read from file named "` + name + `"`) 148 } 149 defer f.Close() 150 151 bw := bufio.NewWriterSize(w, bufSize) 152 br := bufio.NewReaderSize(f, bufSize) 153 defer bw.Flush() 154 return handler(bw, br, live) 155 } 156 157 var ( 158 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 159 errInputEarlyEnd = errors.New(`expected end of input data`) 160 errInvalidComment = errors.New(`expected / or *`) 161 errInvalidHex = errors.New(`expected a base-16 digit`) 162 errInvalidRune = errors.New(`invalid UTF-8 bytes`) 163 errInvalidToken = errors.New(`invalid JSON token`) 164 errNoDigits = errors.New(`expected numeric digits`) 165 errNoStringQuote = errors.New(`expected " or '`) 166 errNoArrayComma = errors.New(`missing comma between array values`) 167 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 168 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 169 errExtraBytes = errors.New(`unexpected extra input bytes`) 170 171 // errNoMoreOutput is a generic dummy output-error, which is meant to be 172 // ultimately ignored, being just an excuse to quit the app immediately 173 // and successfully 174 errNoMoreOutput = errors.New(`no more output`) 175 ) 176 177 // isActualError is to figure out whether not to ignore an error, and thus 178 // show it as an error message 179 func isActualError(err error) bool { 180 return err != nil && err != io.EOF && err != errNoMoreOutput 181 } 182 183 // linePosError is a more descriptive kind of error, showing the source of 184 // the input-related problem, as 1-based a line/pos number pair in front 185 // of the error message 186 type linePosError struct { 187 // line is the 1-based line count from the input 188 line int 189 190 // pos is the 1-based `horizontal` position in its line 191 pos int 192 193 // err is the error message to `decorate` with the position info 194 err error 195 } 196 197 // Error satisfies the error interface 198 func (lpe linePosError) Error() string { 199 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 200 return where + `: ` + lpe.err.Error() 201 } 202 203 // isIdentifier improves control-flow of func handleKey, when it handles 204 // unquoted object keys 205 var isIdentifier = [256]bool{ 206 '_': true, 207 208 '0': true, '1': true, '2': true, '3': true, '4': true, 209 '5': true, '6': true, '7': true, '8': true, '9': true, 210 211 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 212 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 213 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 214 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 215 'Y': true, 'Z': true, 216 217 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 218 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 219 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 220 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 221 'y': true, 'z': true, 222 } 223 224 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 225 // being 0, and normalizes letter-case for the hex letters 226 var matchHex = [256]byte{ 227 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 228 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 229 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 230 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 231 } 232 233 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON; final boolean 234 // value isn't used, and is just there to match the signature of func jsonl 235 func json0(w *bufio.Writer, r *bufio.Reader, live bool) error { 236 jr := jsonReader{r, 1, 1} 237 defer w.Flush() 238 239 if err := jr.handleLeadingJunk(); err != nil { 240 return err 241 } 242 243 // handle a single top-level JSON value 244 err := handleValue(w, &jr) 245 246 // end the only output-line with a line-feed; this also avoids showing 247 // error messages on the same line as the main output, since JSON-0 248 // output has no line-feeds before its last byte 249 outputByte(w, '\n') 250 251 if err != nil { 252 return err 253 } 254 return jr.handleTrailingJunk() 255 } 256 257 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func 258 // avoids writing a trailing line-feed, leaving that up to its caller 259 func jsonl(w *bufio.Writer, r *bufio.Reader, live bool) error { 260 jr := jsonReader{r, 1, 1} 261 262 if err := jr.handleLeadingJunk(); err != nil { 263 return err 264 } 265 266 chunk, err := jr.r.Peek(1) 267 if err == nil && len(chunk) >= 1 { 268 switch b := chunk[0]; b { 269 case '[', '(': 270 return handleArrayJSONL(w, &jr, b, live) 271 } 272 } 273 274 // handle a single top-level JSON value 275 err = handleValue(w, &jr) 276 277 // end the only output-line with a line-feed; this also avoids showing 278 // error messages on the same line as the main output, since JSON-0 279 // output has no line-feeds before its last byte 280 outputByte(w, '\n') 281 282 if err != nil { 283 return err 284 } 285 return jr.handleTrailingJunk() 286 } 287 288 // handleArrayJSONL handles top-level arrays for func jsonl 289 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader, start byte, live bool) error { 290 if err := jr.demandSyntax(start); err != nil { 291 return err 292 } 293 294 var end byte = ']' 295 if start == '(' { 296 end = ')' 297 } 298 299 for n := 0; true; n++ { 300 // there may be whitespace/comments before the next comma 301 if err := jr.seekNext(); err != nil { 302 return err 303 } 304 305 // handle commas between values, as well as trailing ones 306 comma := false 307 b, _ := jr.peekByte() 308 if b == ',' { 309 jr.readByte() 310 comma = true 311 312 // there may be whitespace/comments before an ending ']' 313 if err := jr.seekNext(); err != nil { 314 return err 315 } 316 b, _ = jr.peekByte() 317 } 318 319 // handle end of array 320 if b == end { 321 jr.readByte() 322 if n > 0 { 323 err := outputByte(w, '\n') 324 if live { 325 w.Flush() 326 } 327 return err 328 } 329 return nil 330 } 331 332 // turn commas between adjacent values into line-feeds, as the 333 // output for this custom func is supposed to be JSON Lines 334 if n > 0 { 335 if !comma { 336 return errNoArrayComma 337 } 338 if err := outputByte(w, '\n'); err != nil { 339 return err 340 } 341 if live { 342 w.Flush() 343 } 344 } 345 346 // handle the next value 347 if err := jr.seekNext(); err != nil { 348 return err 349 } 350 if err := handleValue(w, jr); err != nil { 351 return err 352 } 353 } 354 355 // make the compiler happy 356 return nil 357 } 358 359 // jsonReader reads data via a buffer, keeping track of the input position: 360 // this in turn allows showing much more useful errors, when these happen 361 type jsonReader struct { 362 // r is the actual reader 363 r *bufio.Reader 364 365 // line is the 1-based line-counter for input bytes, and gives errors 366 // useful position info 367 line int 368 369 // pos is the 1-based `horizontal` position in its line, and gives 370 // errors useful position info 371 pos int 372 } 373 374 // improveError makes any error more useful, by giving it info about the 375 // current input-position, as a 1-based line/within-line-position pair 376 func (jr jsonReader) improveError(err error) error { 377 if _, ok := err.(linePosError); ok { 378 return err 379 } 380 381 if err == io.EOF { 382 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 383 } 384 if err != nil { 385 return linePosError{jr.line, jr.pos, err} 386 } 387 return nil 388 } 389 390 func (jr *jsonReader) handleLeadingJunk() error { 391 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 392 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 393 // about byte-order by design 394 jr.skipUTF8BOM() 395 396 // ignore leading whitespace and/or comments 397 return jr.seekNext() 398 } 399 400 func (jr *jsonReader) handleTrailingJunk() error { 401 // ignore trailing whitespace and/or comments 402 if err := jr.seekNext(); err != nil { 403 return err 404 } 405 406 // ignore trailing semicolons 407 for { 408 if b, ok := jr.peekByte(); !ok || b != ';' { 409 break 410 } 411 412 jr.readByte() 413 // ignore trailing whitespace and/or comments 414 if err := jr.seekNext(); err != nil { 415 return err 416 } 417 } 418 419 // beyond trailing whitespace and/or comments, any more bytes 420 // make the whole input data invalid JSON 421 if _, ok := jr.peekByte(); ok { 422 return jr.improveError(errExtraBytes) 423 } 424 return nil 425 } 426 427 // demandSyntax fails with an error when the next byte isn't the one given; 428 // when it is, the byte is then read/skipped, and a nil error is returned 429 func (jr *jsonReader) demandSyntax(syntax byte) error { 430 chunk, err := jr.r.Peek(1) 431 if err == io.EOF { 432 return jr.improveError(errInputEarlyEnd) 433 } 434 if err != nil { 435 return jr.improveError(err) 436 } 437 438 if len(chunk) < 1 || chunk[0] != syntax { 439 msg := `expected ` + string(rune(syntax)) 440 return jr.improveError(errors.New(msg)) 441 } 442 443 jr.readByte() 444 return nil 445 } 446 447 // updatePosInfo does what it says, given the byte just read separately 448 func (jr *jsonReader) updatePosInfo(r rune) { 449 if r == '\n' { 450 jr.line += 1 451 jr.pos = 1 452 } else { 453 jr.pos++ 454 } 455 } 456 457 // peekByte simplifies control-flow for various other funcs 458 func (jr jsonReader) peekByte() (b byte, ok bool) { 459 chunk, err := jr.r.Peek(1) 460 if err == nil && len(chunk) >= 1 { 461 return chunk[0], true 462 } 463 return 0, false 464 } 465 466 // readByte does what it says, updating the reader's position info 467 func (jr *jsonReader) readByte() (b byte, err error) { 468 b, err = jr.r.ReadByte() 469 if err == nil { 470 jr.updatePosInfo(rune(b)) 471 return b, nil 472 } 473 return b, jr.improveError(err) 474 } 475 476 // readRune does what it says, updating the reader's position info 477 func (jr *jsonReader) readRune() (r rune, err error) { 478 r, _, err = jr.r.ReadRune() 479 if err == nil { 480 jr.updatePosInfo(r) 481 return r, nil 482 } 483 return r, jr.improveError(err) 484 } 485 486 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 487 // and comments, either single-line (starting with //) or general (starting 488 // with /* and ending with */) 489 func (jr *jsonReader) seekNext() error { 490 for { 491 b, ok := jr.peekByte() 492 if !ok { 493 return nil 494 } 495 496 // case ' ', '\t', '\f', '\v', '\r', '\n': 497 if b <= 32 { 498 // keep skipping whitespace bytes 499 jr.readByte() 500 continue 501 } 502 503 if b == '#' { 504 if err := jr.skipLine(); err != nil { 505 return err 506 } 507 continue 508 } 509 510 if b != '/' { 511 // reached the next token 512 return nil 513 } 514 515 if err := jr.skipComment(); err != nil { 516 return err 517 } 518 519 // after comments, keep looking for more whitespace and/or comments 520 } 521 } 522 523 // skipComment helps func seekNext skip over comments, simplifying the latter 524 // func's control-flow 525 func (jr *jsonReader) skipComment() error { 526 err := jr.demandSyntax('/') 527 if err != nil { 528 return err 529 } 530 531 b, ok := jr.peekByte() 532 if !ok { 533 return nil 534 } 535 536 switch b { 537 case '/': 538 // handle single-line comments 539 return jr.skipLine() 540 541 case '*': 542 // handle (potentially) multi-line comments 543 return jr.skipGeneralComment() 544 545 default: 546 return jr.improveError(errInvalidComment) 547 } 548 } 549 550 // skipLine handles single-line comments for func skipComment 551 func (jr *jsonReader) skipLine() error { 552 for { 553 b, err := jr.readByte() 554 if err == io.EOF { 555 // end of input is fine in this case 556 return nil 557 } 558 if err != nil { 559 return err 560 } 561 562 if b == '\n' { 563 return nil 564 } 565 } 566 } 567 568 // skipGeneralComment handles (potentially) multi-line comments for func 569 // skipComment 570 func (jr *jsonReader) skipGeneralComment() error { 571 var prev byte 572 for { 573 b, err := jr.readByte() 574 if err != nil { 575 return jr.improveError(errCommentEarlyEnd) 576 } 577 578 if prev == '*' && b == '/' { 579 return nil 580 } 581 if b == '\n' { 582 jr.line++ 583 } 584 prev = b 585 } 586 } 587 588 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 589 func (jr *jsonReader) skipUTF8BOM() { 590 lead, err := jr.r.Peek(3) 591 if err != nil { 592 return 593 } 594 595 if len(lead) > 2 && lead[0] == 0xef && lead[1] == 0xbb && lead[2] == 0xbf { 596 jr.readByte() 597 jr.readByte() 598 jr.readByte() 599 jr.pos += 3 600 } 601 } 602 603 // outputByte is a small wrapper on func WriteByte, which adapts any error 604 // into a custom dummy output-error, which is in turn meant to be ignored, 605 // being just an excuse to quit the app immediately and successfully 606 func outputByte(w *bufio.Writer, b byte) error { 607 err := w.WriteByte(b) 608 if err == nil { 609 return nil 610 } 611 return errNoMoreOutput 612 } 613 614 // handleArray handles arrays for func handleValue 615 func handleArray(w *bufio.Writer, jr *jsonReader, start byte) error { 616 if err := jr.demandSyntax(start); err != nil { 617 return err 618 } 619 620 var end byte = ']' 621 if start == '(' { 622 end = ')' 623 } 624 625 w.WriteByte('[') 626 627 for n := 0; true; n++ { 628 // there may be whitespace/comments before the next comma 629 if err := jr.seekNext(); err != nil { 630 return err 631 } 632 633 // handle commas between values, as well as trailing ones 634 comma := false 635 b, _ := jr.peekByte() 636 if b == ',' { 637 jr.readByte() 638 comma = true 639 640 // there may be whitespace/comments before an ending ']' 641 if err := jr.seekNext(); err != nil { 642 return err 643 } 644 b, _ = jr.peekByte() 645 } 646 647 // handle end of array 648 if b == end { 649 jr.readByte() 650 w.WriteByte(']') 651 return nil 652 } 653 654 // don't forget commas between adjacent values 655 if n > 0 { 656 if !comma { 657 return errNoArrayComma 658 } 659 if err := outputByte(w, ','); err != nil { 660 return err 661 } 662 } 663 664 // handle the next value 665 if err := jr.seekNext(); err != nil { 666 return err 667 } 668 if err := handleValue(w, jr); err != nil { 669 return err 670 } 671 } 672 673 // make the compiler happy 674 return nil 675 } 676 677 // handleDigits helps various number-handling funcs do their job 678 func handleDigits(w *bufio.Writer, jr *jsonReader) error { 679 if trySimpleDigits(w, jr) { 680 return nil 681 } 682 683 for n := 0; true; n++ { 684 b, _ := jr.peekByte() 685 686 // support `nice` long numbers by ignoring their underscores 687 if b == '_' { 688 jr.readByte() 689 continue 690 } 691 692 if '0' <= b && b <= '9' { 693 jr.readByte() 694 w.WriteByte(b) 695 continue 696 } 697 698 if n == 0 { 699 return errNoDigits 700 } 701 return nil 702 } 703 704 // make the compiler happy 705 return nil 706 } 707 708 // trySimpleDigits tries to handle (more quickly) digit-runs where all bytes 709 // are just digits: this is a very common case for numbers; returns whether 710 // it succeeded, so this func's caller knows knows if it needs to do anything, 711 // the slower way 712 func trySimpleDigits(w *bufio.Writer, jr *jsonReader) (gotIt bool) { 713 chunk, _ := jr.r.Peek(chunkPeekSize) 714 715 for i, b := range chunk { 716 if '0' <= b && b <= '9' { 717 continue 718 } 719 720 if i == 0 || b == '_' { 721 return false 722 } 723 724 // bulk-writing the chunk is this func's whole point 725 w.Write(chunk[:i]) 726 727 jr.r.Discard(i) 728 jr.pos += i 729 return true 730 } 731 732 // maybe the digits-run is ok, but it's just longer than the chunk 733 return false 734 } 735 736 // handleDot handles pseudo-JSON numbers which start with a decimal dot 737 func handleDot(w *bufio.Writer, jr *jsonReader) error { 738 if err := jr.demandSyntax('.'); err != nil { 739 return err 740 } 741 w.Write([]byte{'0', '.'}) 742 return handleDigits(w, jr) 743 } 744 745 // handleKey is used by func handleObjects and generalizes func handleString, 746 // by allowing unquoted object keys; it's not used anywhere else, as allowing 747 // unquoted string values is ambiguous with actual JSON-keyword values null, 748 // false, and true. 749 func handleKey(w *bufio.Writer, jr *jsonReader) error { 750 quote, ok := jr.peekByte() 751 if !ok { 752 return jr.improveError(errStringEarlyEnd) 753 } 754 755 if quote == '"' || quote == '\'' { 756 return handleString(w, jr, rune(quote)) 757 } 758 759 w.WriteByte('"') 760 for { 761 if b, _ := jr.peekByte(); isIdentifier[b] { 762 jr.readByte() 763 w.WriteByte(b) 764 continue 765 } 766 767 w.WriteByte('"') 768 return nil 769 } 770 } 771 772 // trySimpleString tries to handle (more quickly) inner-strings where all bytes 773 // are unescaped ASCII symbols: this is a very common case for strings, and is 774 // almost always the case for object keys; returns whether it succeeded, so 775 // this func's caller knows knows if it needs to do anything, the slower way 776 func trySimpleString(w *bufio.Writer, jr *jsonReader, quote rune) (gotIt bool) { 777 end := -1 778 chunk, _ := jr.r.Peek(chunkPeekSize) 779 780 for i, b := range chunk { 781 if 32 <= b && b <= 127 && b != '\\' && b != '\'' && b != '"' { 782 continue 783 } 784 785 if b == byte(quote) { 786 end = i 787 break 788 } 789 return false 790 } 791 792 if end < 0 { 793 return false 794 } 795 796 // bulk-writing the chunk is this func's whole point 797 w.WriteByte('"') 798 w.Write(chunk) 799 w.WriteByte('"') 800 801 jr.r.Discard(end + 1) 802 jr.pos += end + 1 803 return true 804 } 805 806 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue 807 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { 808 for rest := kw; len(rest) > 0; rest = rest[1:] { 809 b, err := jr.readByte() 810 if err == nil && b == rest[0] { 811 // keywords given to this func have no line-feeds 812 jr.pos++ 813 continue 814 } 815 816 msg := `expected JSON value ` + string(kw) 817 return jr.improveError(errors.New(msg)) 818 } 819 820 w.Write(kw) 821 return nil 822 } 823 824 // handleNegative handles numbers starting with a negative sign for func 825 // handleValue 826 func handleNegative(w *bufio.Writer, jr *jsonReader) error { 827 if err := jr.demandSyntax('-'); err != nil { 828 return err 829 } 830 831 w.WriteByte('-') 832 if b, _ := jr.peekByte(); b == '.' { 833 jr.readByte() 834 w.Write([]byte{'0', '.'}) 835 return handleDigits(w, jr) 836 } 837 return handleNumber(w, jr) 838 } 839 840 // handleNumber handles numeric values/tokens, including invalid-JSON cases, 841 // such as values starting with a decimal dot 842 func handleNumber(w *bufio.Writer, jr *jsonReader) error { 843 // handle integer digits 844 if err := handleDigits(w, jr); err != nil { 845 return err 846 } 847 848 // handle optional decimal digits, starting with a leading dot 849 if b, _ := jr.peekByte(); b == '.' { 850 jr.readByte() 851 w.WriteByte('.') 852 return handleDigits(w, jr) 853 } 854 855 // handle optional exponent digits 856 if b, _ := jr.peekByte(); b == 'e' || b == 'E' { 857 jr.readByte() 858 w.WriteByte(b) 859 b, _ = jr.peekByte() 860 if b == '+' { 861 jr.readByte() 862 } else if b == '-' { 863 w.WriteByte('-') 864 jr.readByte() 865 } 866 return handleDigits(w, jr) 867 } 868 869 return nil 870 } 871 872 // handleObject handles objects for func handleValue 873 func handleObject(w *bufio.Writer, jr *jsonReader) error { 874 if err := jr.demandSyntax('{'); err != nil { 875 return err 876 } 877 w.WriteByte('{') 878 879 for npairs := 0; true; npairs++ { 880 // there may be whitespace/comments before the next comma 881 if err := jr.seekNext(); err != nil { 882 return err 883 } 884 885 // handle commas between key-value pairs, as well as trailing ones 886 comma := false 887 b, _ := jr.peekByte() 888 if b == ',' { 889 jr.readByte() 890 comma = true 891 892 // there may be whitespace/comments before an ending '}' 893 if err := jr.seekNext(); err != nil { 894 return err 895 } 896 b, _ = jr.peekByte() 897 } 898 899 // handle end of object 900 if b == '}' { 901 jr.readByte() 902 w.WriteByte('}') 903 return nil 904 } 905 906 // don't forget commas between adjacent key-value pairs 907 if npairs > 0 { 908 if !comma { 909 return errNoObjectComma 910 } 911 if err := outputByte(w, ','); err != nil { 912 return err 913 } 914 } 915 916 // handle the next pair's key 917 if err := jr.seekNext(); err != nil { 918 return err 919 } 920 if err := handleKey(w, jr); err != nil { 921 return err 922 } 923 924 // demand a colon right after the key 925 if err := jr.seekNext(); err != nil { 926 return err 927 } 928 if err := jr.demandSyntax(':'); err != nil { 929 return err 930 } 931 w.WriteByte(':') 932 933 // handle the next pair's value 934 if err := jr.seekNext(); err != nil { 935 return err 936 } 937 if err := handleValue(w, jr); err != nil { 938 return err 939 } 940 } 941 942 // make the compiler happy 943 return nil 944 } 945 946 // handlePositive handles numbers starting with a positive sign for func 947 // handleValue 948 func handlePositive(w *bufio.Writer, jr *jsonReader) error { 949 if err := jr.demandSyntax('+'); err != nil { 950 return err 951 } 952 953 // valid JSON isn't supposed to have leading pluses on numbers, so 954 // emit nothing for it, unlike for negative numbers 955 956 if b, _ := jr.peekByte(); b == '.' { 957 jr.readByte() 958 w.Write([]byte{'0', '.'}) 959 return handleDigits(w, jr) 960 } 961 return handleNumber(w, jr) 962 } 963 964 // handleString handles strings for funcs handleValue and handleObject, and 965 // supports both single-quotes and double-quotes, always emitting the latter 966 // in the output, of course 967 func handleString(w *bufio.Writer, jr *jsonReader, quote rune) error { 968 if quote != '"' && quote != '\'' { 969 return errNoStringQuote 970 } 971 972 jr.readByte() 973 974 // try the quicker no-escapes ASCII handler 975 if trySimpleString(w, jr, quote) { 976 return nil 977 } 978 979 // it's a non-trivial inner-string, so handle it byte-by-byte 980 w.WriteByte('"') 981 escaped := false 982 983 for { 984 r, err := jr.readRune() 985 if r == unicode.ReplacementChar { 986 return jr.improveError(errInvalidRune) 987 } 988 if err != nil { 989 if err == io.EOF { 990 return jr.improveError(errStringEarlyEnd) 991 } 992 return jr.improveError(err) 993 } 994 995 if !escaped { 996 if r == '\\' { 997 escaped = true 998 continue 999 } 1000 1001 // handle end of string 1002 if r == quote { 1003 return outputByte(w, '"') 1004 } 1005 1006 if r <= 127 { 1007 w.Write(escapedStringBytes[byte(r)]) 1008 } else { 1009 w.WriteRune(r) 1010 } 1011 continue 1012 } 1013 1014 // handle escaped items 1015 escaped = false 1016 1017 switch r { 1018 case 'u': 1019 // \u needs exactly 4 hex-digits to follow it 1020 w.Write([]byte{'\\', 'u'}) 1021 if err := copyHex(w, 4, jr); err != nil { 1022 return jr.improveError(err) 1023 } 1024 1025 case 'x': 1026 // JSON only supports 4 escaped hex-digits, so pad the 2 1027 // expected hex-digits with 2 zeros 1028 w.Write([]byte{'\\', 'u', '0', '0'}) 1029 if err := copyHex(w, 2, jr); err != nil { 1030 return jr.improveError(err) 1031 } 1032 1033 case 't', 'f', 'r', 'n', 'b', '\\', '"': 1034 // handle valid-JSON escaped string sequences 1035 w.WriteByte('\\') 1036 w.WriteByte(byte(r)) 1037 1038 case '\'': 1039 // escaped single-quotes aren't standard JSON, but they can 1040 // be handy when the input uses non-standard single-quoted 1041 // strings 1042 w.WriteByte('\'') 1043 1044 default: 1045 if r <= 127 { 1046 w.Write(escapedStringBytes[byte(r)]) 1047 } else { 1048 w.WriteRune(r) 1049 } 1050 } 1051 } 1052 } 1053 1054 // copyHex handles a run of hex-digits for func handleString, starting right 1055 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 1056 // errors with position info: that's up to the caller 1057 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 1058 for i := 0; i < n; i++ { 1059 b, err := jr.readByte() 1060 if err == io.EOF { 1061 return errStringEarlyEnd 1062 } 1063 if err != nil { 1064 return err 1065 } 1066 1067 if b >= 128 { 1068 return errInvalidHex 1069 } 1070 1071 if b := matchHex[b]; b != 0 { 1072 w.WriteByte(b) 1073 continue 1074 } 1075 1076 return errInvalidHex 1077 } 1078 1079 return nil 1080 } 1081 1082 // handleValue is a generic JSON-token handler, which allows the recursive 1083 // behavior to handle any kind of JSON/pseudo-JSON input 1084 func handleValue(w *bufio.Writer, jr *jsonReader) error { 1085 chunk, err := jr.r.Peek(1) 1086 if err == nil && len(chunk) >= 1 { 1087 return handleValueDispatch(w, jr, chunk[0]) 1088 } 1089 1090 if err == io.EOF { 1091 return jr.improveError(errInputEarlyEnd) 1092 } 1093 return jr.improveError(errInputEarlyEnd) 1094 } 1095 1096 // handleValueDispatch simplifies control-flow for func handleValue 1097 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { 1098 switch b { 1099 case 'f': 1100 return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) 1101 case 'n': 1102 return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) 1103 case 't': 1104 return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) 1105 case 'F': 1106 return handleKeyword(w, jr, []byte{'F', 'a', 'l', 's', 'e'}) 1107 case 'N': 1108 return handleKeyword(w, jr, []byte{'N', 'o', 'n', 'e'}) 1109 case 'T': 1110 return handleKeyword(w, jr, []byte{'T', 'r', 'u', 'e'}) 1111 case '.': 1112 return handleDot(w, jr) 1113 case '+': 1114 return handlePositive(w, jr) 1115 case '-': 1116 return handleNegative(w, jr) 1117 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 1118 return handleNumber(w, jr) 1119 case '\'', '"': 1120 return handleString(w, jr, rune(b)) 1121 case '[', '(': 1122 return handleArray(w, jr, b) 1123 case '{': 1124 return handleObject(w, jr) 1125 default: 1126 return jr.improveError(errInvalidToken) 1127 } 1128 } 1129 1130 // escapedStringBytes helps func handleString treat all string bytes quickly 1131 // and correctly, using their officially-supported JSON escape sequences 1132 // 1133 // https://www.rfc-editor.org/rfc/rfc8259#section-7 1134 var escapedStringBytes = [256][]byte{ 1135 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 1136 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 1137 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 1138 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 1139 {'\\', 'b'}, {'\\', 't'}, 1140 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 1141 {'\\', 'f'}, {'\\', 'r'}, 1142 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 1143 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 1144 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 1145 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 1146 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 1147 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 1148 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 1149 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 1150 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 1151 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 1152 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 1153 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 1154 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 1155 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 1156 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 1157 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 1158 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 1159 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 1160 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 1161 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 1162 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 1163 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 1164 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 1165 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 1166 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 1167 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 1168 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 1169 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 1170 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 1171 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 1172 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 1173 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 1174 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 1175 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 1176 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 1177 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 1178 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 1179 }