File: j0.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for j0: this version has no http(s) support. Even 27 the unit-tests from the original j0 are omitted. 28 29 To compile a smaller-sized command-line app, you can use the `go` command as 30 follows: 31 32 go build -ldflags "-s -w" -trimpath j0.go 33 */ 34 35 package main 36 37 import ( 38 "bufio" 39 "bytes" 40 "errors" 41 "io" 42 "os" 43 "strconv" 44 "unicode" 45 ) 46 47 const info = ` 48 j0 [options...] [file...] 49 50 51 Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. 52 Its output is always a single line, which ends with a line-feed. 53 54 Besides minimizing bytes, this tool also adapts almost-JSON input into 55 valid JSON, since it 56 57 - ignores both rest-of-line and multi-line comments 58 - ignores extra/trailing commas in arrays and objects 59 - turns single-quoted strings/keys into double-quoted strings 60 - double-quotes unquoted object keys 61 - changes \x 2-hex-digit into \u 4-hex-digit string-escapes 62 63 All options available can either start with a single or a double-dash 64 65 -h show this help message 66 -help show this help message 67 -jsonl emit JSON Lines, when top-level value is an array 68 ` 69 70 const ( 71 bufSize = 32 * 1024 72 errorStyle = "\x1b[31m" 73 ) 74 75 func main() { 76 args := os.Args[1:] 77 handler := json0 78 79 if len(args) > 0 { 80 switch os.Args[1] { 81 case `-h`, `--h`, `-help`, `--help`: 82 os.Stderr.WriteString(info[1:]) 83 return 84 case `-jsonl`, `--jsonl`: 85 handler = jsonl 86 args = args[1:] 87 } 88 } 89 90 if len(args) > 1 { 91 const msg = `only 1 (optional) named input is supported` 92 os.Stderr.WriteString(errorStyle + msg + "\x1b[0m\n") 93 os.Exit(1) 94 } 95 96 name := `-` 97 if len(args) == 1 { 98 name = args[0] 99 } 100 101 if err := run(os.Stdout, name, handler); isActualError(err) { 102 os.Stderr.WriteString(errorStyle) 103 os.Stderr.WriteString(err.Error()) 104 os.Stderr.WriteString("\x1b[0m\n") 105 os.Exit(1) 106 } 107 } 108 109 type handlerFunc func(w *bufio.Writer, r *bufio.Reader) error 110 111 func run(w io.Writer, name string, handler handlerFunc) error { 112 if name == `` || name == `-` { 113 bw := bufio.NewWriterSize(w, bufSize) 114 br := bufio.NewReaderSize(os.Stdin, bufSize) 115 defer bw.Flush() 116 return handler(bw, br) 117 } 118 119 f, err := os.Open(name) 120 if err != nil { 121 return errors.New(`can't read from file named "` + name + `"`) 122 } 123 defer f.Close() 124 125 bw := bufio.NewWriterSize(w, bufSize) 126 br := bufio.NewReaderSize(f, bufSize) 127 defer bw.Flush() 128 return handler(bw, br) 129 } 130 131 var ( 132 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 133 errInputEarlyEnd = errors.New(`expected end of input data`) 134 errInvalidComment = errors.New(`expected / or *`) 135 errInvalidHex = errors.New(`expected a base-16 digit`) 136 errInvalidRune = errors.New(`invalid UTF-8 bytes`) 137 errInvalidToken = errors.New(`invalid JSON token`) 138 errNoDigits = errors.New(`expected numeric digits`) 139 errNoStringQuote = errors.New(`expected " or '`) 140 errNoArrayComma = errors.New(`missing comma between array values`) 141 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 142 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 143 errExtraBytes = errors.New(`unexpected extra input bytes`) 144 145 // errNoMoreOutput is a generic dummy output-error, which is meant to be 146 // ultimately ignored, being just an excuse to quit the app immediately 147 // and successfully 148 errNoMoreOutput = errors.New(`no more output`) 149 ) 150 151 // isActualError is to figure out whether not to ignore an error, and thus 152 // show it as an error message 153 func isActualError(err error) bool { 154 return err != nil && err != io.EOF && err != errNoMoreOutput 155 } 156 157 // linePosError is a more descriptive kind of error, showing the source of 158 // the input-related problem, as 1-based a line/pos number pair in front 159 // of the error message 160 type linePosError struct { 161 // line is the 1-based line count from the input 162 line int 163 164 // pos is the 1-based `horizontal` position in its line 165 pos int 166 167 // err is the error message to `decorate` with the position info 168 err error 169 } 170 171 // Error satisfies the error interface 172 func (lpe linePosError) Error() string { 173 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 174 return where + `: ` + lpe.err.Error() 175 } 176 177 // isIdentifier improves control-flow of func handleKey, when it handles 178 // unquoted object keys 179 var isIdentifier = [256]bool{ 180 '_': true, 181 182 '0': true, '1': true, '2': true, '3': true, '4': true, 183 '5': true, '6': true, '7': true, '8': true, '9': true, 184 185 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 186 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 187 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 188 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 189 'Y': true, 'Z': true, 190 191 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 192 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 193 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 194 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 195 'y': true, 'z': true, 196 } 197 198 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 199 // being 0, and normalizes letter-case for the hex letters 200 var matchHex = [256]byte{ 201 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 202 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 203 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 204 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 205 } 206 207 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON 208 func json0(w *bufio.Writer, r *bufio.Reader) error { 209 jr := jsonReader{r, 1, 1} 210 defer w.Flush() 211 212 if err := jr.handleLeadingJunk(); err != nil { 213 return err 214 } 215 216 // handle a single top-level JSON value 217 err := handleValue(w, &jr) 218 219 // end the only output-line with a line-feed; this also avoids showing 220 // error messages on the same line as the main output, since JSON-0 221 // output has no line-feeds before its last byte 222 outputByte(w, '\n') 223 224 if err != nil { 225 return err 226 } 227 return jr.handleTrailingJunk() 228 } 229 230 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func 231 // avoids writing a trailing line-feed, leaving that up to its caller 232 func jsonl(w *bufio.Writer, r *bufio.Reader) error { 233 jr := jsonReader{r, 1, 1} 234 235 if err := jr.handleLeadingJunk(); err != nil { 236 return err 237 } 238 239 chunk, err := jr.r.Peek(1) 240 if err == nil && len(chunk) >= 1 && chunk[0] == '[' { 241 return handleArrayJSONL(w, &jr) 242 } 243 244 // handle a single top-level JSON value 245 err = handleValue(w, &jr) 246 247 // end the only output-line with a line-feed; this also avoids showing 248 // error messages on the same line as the main output, since JSON-0 249 // output has no line-feeds before its last byte 250 outputByte(w, '\n') 251 252 if err != nil { 253 return err 254 } 255 return jr.handleTrailingJunk() 256 } 257 258 // handleArrayJSONL handles top-level arrays for func jsonl 259 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader) error { 260 if err := jr.demandSyntax('['); err != nil { 261 return err 262 } 263 264 for n := 0; true; n++ { 265 // there may be whitespace/comments before the next comma 266 if err := jr.seekNext(); err != nil { 267 return err 268 } 269 270 // handle commas between values, as well as trailing ones 271 comma := false 272 b, _ := jr.peekByte() 273 if b == ',' { 274 jr.readByte() 275 comma = true 276 277 // there may be whitespace/comments before an ending ']' 278 if err := jr.seekNext(); err != nil { 279 return err 280 } 281 b, _ = jr.peekByte() 282 } 283 284 // handle end of array 285 if b == ']' { 286 jr.readByte() 287 if n > 0 { 288 err := outputByte(w, '\n') 289 w.Flush() 290 return err 291 } 292 return nil 293 } 294 295 // turn commas between adjacent values into line-feeds, as the 296 // output for this custom func is supposed to be JSON Lines 297 if n > 0 { 298 if !comma { 299 return errNoArrayComma 300 } 301 if err := outputByte(w, '\n'); err != nil { 302 return err 303 } 304 w.Flush() 305 } 306 307 // handle the next value 308 if err := jr.seekNext(); err != nil { 309 return err 310 } 311 if err := handleValue(w, jr); err != nil { 312 return err 313 } 314 } 315 316 // make the compiler happy 317 return nil 318 } 319 320 // jsonReader reads data via a buffer, keeping track of the input position: 321 // this in turn allows showing much more useful errors, when these happen 322 type jsonReader struct { 323 // r is the actual reader 324 r *bufio.Reader 325 326 // line is the 1-based line-counter for input bytes, and gives errors 327 // useful position info 328 line int 329 330 // pos is the 1-based `horizontal` position in its line, and gives 331 // errors useful position info 332 pos int 333 } 334 335 // improveError makes any error more useful, by giving it info about the 336 // current input-position, as a 1-based line/within-line-position pair 337 func (jr jsonReader) improveError(err error) error { 338 if _, ok := err.(linePosError); ok { 339 return err 340 } 341 342 if err == io.EOF { 343 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 344 } 345 if err != nil { 346 return linePosError{jr.line, jr.pos, err} 347 } 348 return nil 349 } 350 351 func (jr *jsonReader) handleLeadingJunk() error { 352 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 353 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 354 // about byte-order by design 355 jr.skipUTF8BOM() 356 357 // ignore leading whitespace and/or comments 358 return jr.seekNext() 359 } 360 361 func (jr *jsonReader) handleTrailingJunk() error { 362 // ignore trailing whitespace and/or comments 363 if err := jr.seekNext(); err != nil { 364 return err 365 } 366 367 // ignore trailing semicolon 368 if b, ok := jr.peekByte(); ok && b == ';' { 369 jr.readByte() 370 // ignore trailing whitespace and/or comments 371 if err := jr.seekNext(); err != nil { 372 return err 373 } 374 } 375 376 // beyond trailing whitespace and/or comments, any more bytes 377 // make the whole input data invalid JSON 378 if _, ok := jr.peekByte(); ok { 379 return jr.improveError(errExtraBytes) 380 } 381 return nil 382 } 383 384 // demandSyntax fails with an error when the next byte isn't the one given; 385 // when it is, the byte is then read/skipped, and a nil error is returned 386 func (jr *jsonReader) demandSyntax(syntax byte) error { 387 chunk, err := jr.r.Peek(1) 388 if err == io.EOF { 389 return jr.improveError(errInputEarlyEnd) 390 } 391 if err != nil { 392 return jr.improveError(err) 393 } 394 395 if len(chunk) < 1 || chunk[0] != syntax { 396 msg := `expected ` + string(rune(syntax)) 397 return jr.improveError(errors.New(msg)) 398 } 399 400 jr.readByte() 401 return nil 402 } 403 404 // updatePosInfo does what it says, given the byte just read separately 405 func (jr *jsonReader) updatePosInfo(r rune) { 406 if r == '\n' { 407 jr.line += 1 408 jr.pos = 1 409 } else { 410 jr.pos++ 411 } 412 } 413 414 // peekByte simplifies control-flow for various other funcs 415 func (jr jsonReader) peekByte() (b byte, ok bool) { 416 chunk, err := jr.r.Peek(1) 417 if err == nil && len(chunk) >= 1 { 418 return chunk[0], true 419 } 420 return 0, false 421 } 422 423 // readByte does what it says, updating the reader's position info 424 func (jr *jsonReader) readByte() (b byte, err error) { 425 b, err = jr.r.ReadByte() 426 if err == nil { 427 jr.updatePosInfo(rune(b)) 428 return b, nil 429 } 430 return b, jr.improveError(err) 431 } 432 433 // readRune does what it says, updating the reader's position info 434 func (jr *jsonReader) readRune() (r rune, err error) { 435 r, _, err = jr.r.ReadRune() 436 if err == nil { 437 jr.updatePosInfo(r) 438 return r, nil 439 } 440 return r, jr.improveError(err) 441 } 442 443 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 444 // and comments, either single-line (starting with //) or general (starting 445 // with /* and ending with */) 446 func (jr *jsonReader) seekNext() error { 447 for { 448 b, ok := jr.peekByte() 449 if !ok { 450 return nil 451 } 452 453 // case ' ', '\t', '\f', '\v', '\r', '\n': 454 if b <= 32 { 455 // keep skipping whitespace bytes 456 jr.readByte() 457 continue 458 } 459 460 if b != '/' { 461 // reached the next token 462 return nil 463 } 464 465 if err := jr.skipComment(); err != nil { 466 return err 467 } 468 469 // after comments, keep looking for more whitespace and/or comments 470 } 471 } 472 473 // skipComment helps func seekNext skip over comments, simplifying the latter 474 // func's control-flow 475 func (jr *jsonReader) skipComment() error { 476 err := jr.demandSyntax('/') 477 if err != nil { 478 return err 479 } 480 481 b, ok := jr.peekByte() 482 if !ok { 483 return jr.improveError(errInputEarlyEnd) 484 } 485 486 switch b { 487 case '/': 488 // handle single-line comments 489 return jr.skipLine() 490 491 case '*': 492 // handle (potentially) multi-line comments 493 return jr.skipGeneralComment() 494 495 default: 496 return jr.improveError(errInvalidComment) 497 } 498 } 499 500 // skipLine handles single-line comments for func skipComment 501 func (jr *jsonReader) skipLine() error { 502 for { 503 b, err := jr.readByte() 504 if err == io.EOF { 505 // end of input is fine in this case 506 return nil 507 } 508 if err != nil { 509 return err 510 } 511 512 if b == '\n' { 513 return nil 514 } 515 } 516 } 517 518 // skipGeneralComment handles (potentially) multi-line comments for func 519 // skipComment 520 func (jr *jsonReader) skipGeneralComment() error { 521 var prev byte 522 for { 523 b, err := jr.readByte() 524 if err != nil { 525 return jr.improveError(errCommentEarlyEnd) 526 } 527 528 if prev == '*' && b == '/' { 529 return nil 530 } 531 if b == '\n' { 532 jr.line++ 533 } 534 prev = b 535 } 536 } 537 538 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 539 func (jr *jsonReader) skipUTF8BOM() { 540 lead, err := jr.r.Peek(3) 541 if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) { 542 jr.readByte() 543 jr.readByte() 544 jr.readByte() 545 jr.pos += 3 546 } 547 } 548 549 // outputByte is a small wrapper on func WriteByte, which adapts any error 550 // into a custom dummy output-error, which is in turn meant to be ignored, 551 // being just an excuse to quit the app immediately and successfully 552 func outputByte(w *bufio.Writer, b byte) error { 553 err := w.WriteByte(b) 554 if err == nil { 555 return nil 556 } 557 return errNoMoreOutput 558 } 559 560 // handleArray handles arrays for func handleValue 561 func handleArray(w *bufio.Writer, jr *jsonReader) error { 562 if err := jr.demandSyntax('['); err != nil { 563 return err 564 } 565 w.WriteByte('[') 566 567 for n := 0; true; n++ { 568 // there may be whitespace/comments before the next comma 569 if err := jr.seekNext(); err != nil { 570 return err 571 } 572 573 // handle commas between values, as well as trailing ones 574 comma := false 575 b, _ := jr.peekByte() 576 if b == ',' { 577 jr.readByte() 578 comma = true 579 580 // there may be whitespace/comments before an ending ']' 581 if err := jr.seekNext(); err != nil { 582 return err 583 } 584 b, _ = jr.peekByte() 585 } 586 587 // handle end of array 588 if b == ']' { 589 jr.readByte() 590 w.WriteByte(']') 591 return nil 592 } 593 594 // don't forget commas between adjacent values 595 if n > 0 { 596 if !comma { 597 return errNoArrayComma 598 } 599 if err := outputByte(w, ','); err != nil { 600 return err 601 } 602 } 603 604 // handle the next value 605 if err := jr.seekNext(); err != nil { 606 return err 607 } 608 if err := handleValue(w, jr); err != nil { 609 return err 610 } 611 } 612 613 // make the compiler happy 614 return nil 615 } 616 617 // handleDigits helps various number-handling funcs do their job 618 func handleDigits(w *bufio.Writer, jr *jsonReader) error { 619 for n := 0; true; n++ { 620 b, _ := jr.peekByte() 621 622 // support `nice` long numbers by ignoring their underscores 623 if b == '_' { 624 jr.readByte() 625 continue 626 } 627 628 if '0' <= b && b <= '9' { 629 jr.readByte() 630 w.WriteByte(b) 631 continue 632 } 633 634 if n == 0 { 635 return errNoDigits 636 } 637 return nil 638 } 639 640 // make the compiler happy 641 return nil 642 } 643 644 // handleDot handles pseudo-JSON numbers which start with a decimal dot 645 func handleDot(w *bufio.Writer, jr *jsonReader) error { 646 if err := jr.demandSyntax('.'); err != nil { 647 return err 648 } 649 w.Write([]byte{'0', '.'}) 650 return handleDigits(w, jr) 651 } 652 653 // handleKey is used by func handleObjects and generalizes func handleString, 654 // by allowing unquoted object keys; it's not used anywhere else, as allowing 655 // unquoted string values is ambiguous with actual JSON-keyword values null, 656 // false, and true. 657 func handleKey(w *bufio.Writer, jr *jsonReader) error { 658 quote, ok := jr.peekByte() 659 if !ok { 660 return jr.improveError(errStringEarlyEnd) 661 } 662 663 if quote == '"' || quote == '\'' { 664 return handleString(w, jr) 665 } 666 667 w.WriteByte('"') 668 for { 669 if b, _ := jr.peekByte(); isIdentifier[b] { 670 jr.readByte() 671 w.WriteByte(b) 672 continue 673 } 674 675 w.WriteByte('"') 676 return nil 677 } 678 } 679 680 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes 681 // are unescaped ASCII symbols: this is a very common case for strings, and is 682 // almost always the case for object keys; returns whether it succeeded, so 683 // this func's caller knows knows if it needs to do anything, the slower way 684 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) { 685 chunk, _ := jr.r.Peek(64) 686 687 for i, b := range chunk { 688 if b < 32 || b > 127 || b == '\\' { 689 return false 690 } 691 if b != quote { 692 continue 693 } 694 695 // bulk-writing the chunk is this func's whole point 696 w.WriteByte('"') 697 w.Write(chunk[:i]) 698 w.WriteByte('"') 699 700 jr.r.Discard(i + 1) 701 return true 702 } 703 704 // maybe the inner-string is ok, but it's just longer than the chunk 705 return false 706 } 707 708 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue 709 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { 710 for rest := kw; len(rest) > 0; rest = rest[1:] { 711 b, err := jr.readByte() 712 if err == nil && b == rest[0] { 713 // keywords given to this func have no line-feeds 714 jr.pos++ 715 continue 716 } 717 718 msg := `expected JSON value ` + string(kw) 719 return jr.improveError(errors.New(msg)) 720 } 721 722 w.Write(kw) 723 return nil 724 } 725 726 // handleNegative handles numbers starting with a negative sign for func 727 // handleValue 728 func handleNegative(w *bufio.Writer, jr *jsonReader) error { 729 if err := jr.demandSyntax('-'); err != nil { 730 return err 731 } 732 733 w.WriteByte('-') 734 if b, _ := jr.peekByte(); b == '.' { 735 jr.readByte() 736 w.Write([]byte{'0', '.'}) 737 return handleDigits(w, jr) 738 } 739 return handleNumber(w, jr) 740 } 741 742 // handleNumber handles numeric values/tokens, including invalid-JSON cases, 743 // such as values starting with a decimal dot 744 func handleNumber(w *bufio.Writer, jr *jsonReader) error { 745 // handle integer digits 746 if err := handleDigits(w, jr); err != nil { 747 return err 748 } 749 750 // handle optional decimal digits, starting with a leading dot 751 if b, _ := jr.peekByte(); b == '.' { 752 jr.readByte() 753 w.WriteByte('.') 754 return handleDigits(w, jr) 755 } 756 757 // handle optional exponent digits 758 if b, _ := jr.peekByte(); b == 'e' || b == 'E' { 759 jr.readByte() 760 w.WriteByte(b) 761 b, _ = jr.peekByte() 762 if b == '+' { 763 jr.readByte() 764 } else if b == '-' { 765 w.WriteByte('-') 766 jr.readByte() 767 } 768 return handleDigits(w, jr) 769 } 770 771 return nil 772 } 773 774 // handleObject handles objects for func handleValue 775 func handleObject(w *bufio.Writer, jr *jsonReader) error { 776 if err := jr.demandSyntax('{'); err != nil { 777 return err 778 } 779 w.WriteByte('{') 780 781 for npairs := 0; true; npairs++ { 782 // there may be whitespace/comments before the next comma 783 if err := jr.seekNext(); err != nil { 784 return err 785 } 786 787 // handle commas between key-value pairs, as well as trailing ones 788 comma := false 789 b, _ := jr.peekByte() 790 if b == ',' { 791 jr.readByte() 792 comma = true 793 794 // there may be whitespace/comments before an ending '}' 795 if err := jr.seekNext(); err != nil { 796 return err 797 } 798 b, _ = jr.peekByte() 799 } 800 801 // handle end of object 802 if b == '}' { 803 jr.readByte() 804 w.WriteByte('}') 805 return nil 806 } 807 808 // don't forget commas between adjacent key-value pairs 809 if npairs > 0 { 810 if !comma { 811 return errNoObjectComma 812 } 813 if err := outputByte(w, ','); err != nil { 814 return err 815 } 816 } 817 818 // handle the next pair's key 819 if err := jr.seekNext(); err != nil { 820 return err 821 } 822 if err := handleKey(w, jr); err != nil { 823 return err 824 } 825 826 // demand a colon right after the key 827 if err := jr.seekNext(); err != nil { 828 return err 829 } 830 if err := jr.demandSyntax(':'); err != nil { 831 return err 832 } 833 w.WriteByte(':') 834 835 // handle the next pair's value 836 if err := jr.seekNext(); err != nil { 837 return err 838 } 839 if err := handleValue(w, jr); err != nil { 840 return err 841 } 842 } 843 844 // make the compiler happy 845 return nil 846 } 847 848 // handlePositive handles numbers starting with a positive sign for func 849 // handleValue 850 func handlePositive(w *bufio.Writer, jr *jsonReader) error { 851 if err := jr.demandSyntax('+'); err != nil { 852 return err 853 } 854 855 // valid JSON isn't supposed to have leading pluses on numbers, so 856 // emit nothing for it, unlike for negative numbers 857 858 if b, _ := jr.peekByte(); b == '.' { 859 jr.readByte() 860 w.Write([]byte{'0', '.'}) 861 return handleDigits(w, jr) 862 } 863 return handleNumber(w, jr) 864 } 865 866 // handleString handles strings for funcs handleValue and handleObject, and 867 // supports both single-quotes and double-quotes, always emitting the latter 868 // in the output, of course 869 func handleString(w *bufio.Writer, jr *jsonReader) error { 870 q, ok := jr.peekByte() 871 if !ok || (q != '"' && q != '\'') { 872 return errNoStringQuote 873 } 874 875 jr.readByte() 876 quote := rune(q) 877 878 // try the quicker all-unescaped-ASCII handler 879 if trySimpleInner(w, jr, q) { 880 return nil 881 } 882 883 // it's a non-trivial inner-string, so handle it byte-by-byte 884 w.WriteByte('"') 885 escaped := false 886 887 for { 888 r, err := jr.readRune() 889 if r == unicode.ReplacementChar { 890 return jr.improveError(errInvalidRune) 891 } 892 if err != nil { 893 if err == io.EOF { 894 return jr.improveError(errStringEarlyEnd) 895 } 896 return jr.improveError(err) 897 } 898 899 if !escaped { 900 if r == '\\' { 901 escaped = true 902 continue 903 } 904 905 // handle end of string 906 if r == quote { 907 return outputByte(w, '"') 908 } 909 910 if r <= 127 { 911 w.Write(escapedStringBytes[byte(r)]) 912 } else { 913 w.WriteRune(r) 914 } 915 continue 916 } 917 918 // handle escaped items 919 escaped = false 920 921 switch r { 922 case 'u': 923 // \u needs exactly 4 hex-digits to follow it 924 w.Write([]byte{'\\', 'u'}) 925 if err := copyHex(w, 4, jr); err != nil { 926 return jr.improveError(err) 927 } 928 929 case 'x': 930 // JSON only supports 4 escaped hex-digits, so pad the 2 931 // expected hex-digits with 2 zeros 932 w.Write([]byte{'\\', 'u', '0', '0'}) 933 if err := copyHex(w, 2, jr); err != nil { 934 return jr.improveError(err) 935 } 936 937 case 't', 'f', 'r', 'n', 'b', '\\', '"': 938 // handle valid-JSON escaped string sequences 939 w.WriteByte('\\') 940 w.WriteByte(byte(r)) 941 942 case '\'': 943 // escaped single-quotes aren't standard JSON, but they can 944 // be handy when the input uses non-standard single-quoted 945 // strings 946 w.WriteByte('\'') 947 948 default: 949 if r <= 127 { 950 w.Write(escapedStringBytes[byte(r)]) 951 } else { 952 w.WriteRune(r) 953 } 954 } 955 } 956 } 957 958 // copyHex handles a run of hex-digits for func handleString, starting right 959 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 960 // errors with position info: that's up to the caller 961 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 962 for i := 0; i < n; i++ { 963 b, err := jr.readByte() 964 if err == io.EOF { 965 return errStringEarlyEnd 966 } 967 if err != nil { 968 return err 969 } 970 971 if b >= 128 { 972 return errInvalidHex 973 } 974 975 if b := matchHex[b]; b != 0 { 976 w.WriteByte(b) 977 continue 978 } 979 980 return errInvalidHex 981 } 982 983 return nil 984 } 985 986 // handleValue is a generic JSON-token handler, which allows the recursive 987 // behavior to handle any kind of JSON/pseudo-JSON input 988 func handleValue(w *bufio.Writer, jr *jsonReader) error { 989 chunk, err := jr.r.Peek(1) 990 if err == nil && len(chunk) >= 1 { 991 return handleValueDispatch(w, jr, chunk[0]) 992 } 993 994 if err == io.EOF { 995 return jr.improveError(errInputEarlyEnd) 996 } 997 return jr.improveError(errInputEarlyEnd) 998 } 999 1000 // handleValueDispatch simplifies control-flow for func handleValue 1001 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { 1002 switch b { 1003 case 'f': 1004 return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) 1005 case 'n': 1006 return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) 1007 case 't': 1008 return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) 1009 case 'F': 1010 return handleKeyword(w, jr, []byte{'F', 'a', 'l', 's', 'e'}) 1011 case 'N': 1012 return handleKeyword(w, jr, []byte{'N', 'o', 'n', 'e'}) 1013 case 'T': 1014 return handleKeyword(w, jr, []byte{'T', 'r', 'u', 'e'}) 1015 case '.': 1016 return handleDot(w, jr) 1017 case '+': 1018 return handlePositive(w, jr) 1019 case '-': 1020 return handleNegative(w, jr) 1021 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 1022 return handleNumber(w, jr) 1023 case '\'', '"': 1024 return handleString(w, jr) 1025 case '[': 1026 return handleArray(w, jr) 1027 case '{': 1028 return handleObject(w, jr) 1029 default: 1030 return jr.improveError(errInvalidToken) 1031 } 1032 } 1033 1034 // escapedStringBytes helps func handleString treat all string bytes quickly 1035 // and correctly, using their officially-supported JSON escape sequences 1036 // 1037 // https://www.rfc-editor.org/rfc/rfc8259#section-7 1038 var escapedStringBytes = [256][]byte{ 1039 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 1040 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 1041 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 1042 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 1043 {'\\', 'b'}, {'\\', 't'}, 1044 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 1045 {'\\', 'f'}, {'\\', 'r'}, 1046 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 1047 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 1048 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 1049 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 1050 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 1051 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 1052 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 1053 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 1054 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 1055 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 1056 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 1057 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 1058 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 1059 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 1060 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 1061 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 1062 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 1063 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 1064 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 1065 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 1066 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 1067 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 1068 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 1069 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 1070 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 1071 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 1072 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 1073 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 1074 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 1075 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 1076 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 1077 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 1078 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 1079 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 1080 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 1081 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 1082 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 1083 }