JsonReader.java revision eb97c0ddc063176c26065fc6855188edf0c16e03
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.util; 18 19import java.io.EOFException; 20import java.io.IOException; 21import java.io.Reader; 22import java.io.Closeable; 23import java.util.ArrayList; 24import java.util.List; 25 26/** 27 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 28 * encoded value as a stream of tokens. This stream includes both literal 29 * values (strings, numbers, booleans, and nulls) as well as the begin and 30 * end delimiters of objects and arrays. The tokens are traversed in 31 * depth-first order, the same order that they appear in the JSON document. 32 * Within JSON objects, name/value pairs are represented by a single token. 33 * 34 * <h3>Parsing JSON</h3> 35 * To create a recursive descent parser for your own JSON streams, first create 36 * an entry point method that creates a {@code JsonReader}. 37 * 38 * <p>Next, create handler methods for each structure in your JSON text. You'll 39 * need a method for each object type and for each array type. 40 * <ul> 41 * <li>Within <strong>array handling</strong> methods, first call {@link 42 * #beginArray} to consume the array's opening bracket. Then create a 43 * while loop that accumulates values, terminating when {@link #hasNext} 44 * is false. Finally, read the array's closing bracket by calling {@link 45 * #endArray}. 46 * <li>Within <strong>object handling</strong> methods, first call {@link 47 * #beginObject} to consume the object's opening brace. Then create a 48 * while loop that assigns values to local variables based on their name. 49 * This loop should terminate when {@link #hasNext} is false. Finally, 50 * read the object's closing brace by calling {@link #endObject}. 51 * </ul> 52 * <p>When a nested object or array is encountered, delegate to the 53 * corresponding handler method. 54 * 55 * <p>When an unknown name is encountered, strict parsers should fail with an 56 * exception. Lenient parsers should call {@link #skipValue()} to recursively 57 * skip the value's nested tokens, which may otherwise conflict. 58 * 59 * <p>If a value may be null, you should first check using {@link #peek()}. 60 * Null literals can be consumed using either {@link #nextNull()} or {@link 61 * #skipValue()}. 62 * 63 * <h3>Example</h3> 64 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code 65 * [ 66 * { 67 * "id": 912345678901, 68 * "text": "How do I read JSON on Android?", 69 * "geo": null, 70 * "user": { 71 * "name": "android_newb", 72 * "followers_count": 41 73 * } 74 * }, 75 * { 76 * "id": 912345678902, 77 * "text": "@android_newb just use android.util.JsonReader!", 78 * "geo": [50.454722, -104.606667], 79 * "user": { 80 * "name": "jesse", 81 * "followers_count": 2 82 * } 83 * } 84 * ]}</pre> 85 * This code implements the parser for the above structure: <pre> {@code 86 * 87 * public List<Message> readJsonStream(InputStream in) throws IOException { 88 * JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8")); 89 * return readMessagesArray(reader); 90 * } 91 * 92 * public List<Message> readMessagesArray(JsonReader reader) throws IOException { 93 * List<Message> messages = new ArrayList<Message>(); 94 * 95 * reader.beginArray(); 96 * while (reader.hasNext()) { 97 * messages.add(readMessage(reader)); 98 * } 99 * reader.endArray(); 100 * return messages; 101 * } 102 * 103 * public Message readMessage(JsonReader reader) throws IOException { 104 * long id = -1; 105 * String text = null; 106 * User user = null; 107 * List<Double> geo = null; 108 * 109 * reader.beginObject(); 110 * while (reader.hasNext()) { 111 * String name = reader.nextName(); 112 * if (name.equals("id")) { 113 * id = reader.nextLong(); 114 * } else if (name.equals("text")) { 115 * text = reader.nextString(); 116 * } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) { 117 * geo = readDoublesArray(reader); 118 * } else if (name.equals("user")) { 119 * user = readUser(reader); 120 * } else { 121 * reader.skipValue(); 122 * } 123 * } 124 * reader.endObject(); 125 * return new Message(id, text, user, geo); 126 * } 127 * 128 * public List<Double> readDoublesArray(JsonReader reader) throws IOException { 129 * List<Double> doubles = new ArrayList<Double>(); 130 * 131 * reader.beginArray(); 132 * while (reader.hasNext()) { 133 * doubles.add(reader.nextDouble()); 134 * } 135 * reader.endArray(); 136 * return doubles; 137 * } 138 * 139 * public User readUser(JsonReader reader) throws IOException { 140 * String username = null; 141 * int followersCount = -1; 142 * 143 * reader.beginObject(); 144 * while (reader.hasNext()) { 145 * String name = reader.nextName(); 146 * if (name.equals("name")) { 147 * username = reader.nextString(); 148 * } else if (name.equals("followers_count")) { 149 * followersCount = reader.nextInt(); 150 * } else { 151 * reader.skipValue(); 152 * } 153 * } 154 * reader.endObject(); 155 * return new User(username, followersCount); 156 * }}</pre> 157 * 158 * <h3>Number Handling</h3> 159 * This reader permits numeric values to be read as strings and string values to 160 * be read as numbers. For example, both elements of the JSON array {@code 161 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}. 162 * This behavior is intended to prevent lossy numeric conversions: double is 163 * JavaScript's only numeric type and very large values like {@code 164 * 9007199254740993} cannot be represented exactly on that platform. To minimize 165 * precision loss, extremely large values should be written and read as strings 166 * in JSON. 167 * 168 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances 169 * of this class are not thread safe. 170 */ 171public final class JsonReader implements Closeable { 172 173 private static final String TRUE = "true"; 174 private static final String FALSE = "false"; 175 176 /** The input JSON. */ 177 private final Reader in; 178 179 /** True to accept non-spec compliant JSON */ 180 private boolean lenient = false; 181 182 /** 183 * Use a manual buffer to easily read and unread upcoming characters, and 184 * also so we can create strings without an intermediate StringBuilder. 185 * We decode literals directly out of this buffer, so it must be at least as 186 * long as the longest token that can be reported as a number. 187 */ 188 private final char[] buffer = new char[1024]; 189 private int pos = 0; 190 private int limit = 0; 191 192 private final List<JsonScope> stack = new ArrayList<JsonScope>(); 193 { 194 push(JsonScope.EMPTY_DOCUMENT); 195 } 196 197 /** 198 * The type of the next token to be returned by {@link #peek} and {@link 199 * #advance}. If null, peek() will assign a value. 200 */ 201 private JsonToken token; 202 203 /** The text of the next name. */ 204 private String name; 205 206 /* 207 * For the next literal value, we may have the text value, or the position 208 * and length in the buffer. 209 */ 210 private String value; 211 private int valuePos; 212 private int valueLength; 213 214 /** True if we're currently handling a skipValue() call. */ 215 private boolean skipping = false; 216 217 /** 218 * Creates a new instance that reads a JSON-encoded stream from {@code in}. 219 */ 220 public JsonReader(Reader in) { 221 if (in == null) { 222 throw new NullPointerException("in == null"); 223 } 224 this.in = in; 225 } 226 227 /** 228 * Configure this parser to be be liberal in what it accepts. By default, 229 * this parser is strict and only accepts JSON as specified by <a 230 * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the 231 * parser to lenient causes it to ignore the following syntax errors: 232 * 233 * <ul> 234 * <li>End of line comments starting with {@code //} or {@code #} and 235 * ending with a newline character. 236 * <li>C-style comments starting with {@code /*} and ending with 237 * {@code *}{@code /}. Such comments may not be nested. 238 * <li>Names that are unquoted or {@code 'single quoted'}. 239 * <li>Strings that are unquoted or {@code 'single quoted'}. 240 * <li>Array elements separated by {@code ;} instead of {@code ,}. 241 * <li>Unnecessary array separators. These are interpreted as if null 242 * was the omitted value. 243 * <li>Names and values separated by {@code =} or {@code =>} instead of 244 * {@code :}. 245 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}. 246 * </ul> 247 */ 248 public void setLenient(boolean lenient) { 249 this.lenient = lenient; 250 } 251 252 /** 253 * Returns true if this parser is liberal in what it accepts. 254 */ 255 public boolean isLenient() { 256 return lenient; 257 } 258 259 /** 260 * Consumes the next token from the JSON stream and asserts that it is the 261 * beginning of a new array. 262 */ 263 public void beginArray() throws IOException { 264 expect(JsonToken.BEGIN_ARRAY); 265 } 266 267 /** 268 * Consumes the next token from the JSON stream and asserts that it is the 269 * end of the current array. 270 */ 271 public void endArray() throws IOException { 272 expect(JsonToken.END_ARRAY); 273 } 274 275 /** 276 * Consumes the next token from the JSON stream and asserts that it is the 277 * beginning of a new object. 278 */ 279 public void beginObject() throws IOException { 280 expect(JsonToken.BEGIN_OBJECT); 281 } 282 283 /** 284 * Consumes the next token from the JSON stream and asserts that it is the 285 * end of the current array. 286 */ 287 public void endObject() throws IOException { 288 expect(JsonToken.END_OBJECT); 289 } 290 291 /** 292 * Consumes {@code expected}. 293 */ 294 private void expect(JsonToken expected) throws IOException { 295 peek(); 296 if (token != expected) { 297 throw new IllegalStateException("Expected " + expected + " but was " + peek()); 298 } 299 advance(); 300 } 301 302 /** 303 * Returns true if the current array or object has another element. 304 */ 305 public boolean hasNext() throws IOException { 306 peek(); 307 return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY; 308 } 309 310 /** 311 * Returns the type of the next token without consuming it. 312 */ 313 public JsonToken peek() throws IOException { 314 if (token != null) { 315 return token; 316 } 317 318 switch (peekStack()) { 319 case EMPTY_DOCUMENT: 320 replaceTop(JsonScope.NONEMPTY_DOCUMENT); 321 JsonToken firstToken = nextValue(); 322 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) { 323 throw new IOException( 324 "Expected JSON document to start with '[' or '{' but was " + token); 325 } 326 return firstToken; 327 case EMPTY_ARRAY: 328 return nextInArray(true); 329 case NONEMPTY_ARRAY: 330 return nextInArray(false); 331 case EMPTY_OBJECT: 332 return nextInObject(true); 333 case DANGLING_NAME: 334 return objectValue(); 335 case NONEMPTY_OBJECT: 336 return nextInObject(false); 337 case NONEMPTY_DOCUMENT: 338 try { 339 JsonToken token = nextValue(); 340 if (lenient) { 341 return token; 342 } 343 throw syntaxError("Expected EOF"); 344 } catch (EOFException e) { 345 return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here? 346 } 347 case CLOSED: 348 throw new IllegalStateException("JsonReader is closed"); 349 default: 350 throw new AssertionError(); 351 } 352 } 353 354 /** 355 * Advances the cursor in the JSON stream to the next token. 356 */ 357 private JsonToken advance() throws IOException { 358 peek(); 359 360 JsonToken result = token; 361 token = null; 362 value = null; 363 name = null; 364 return result; 365 } 366 367 /** 368 * Returns the next token, a {@link JsonToken#NAME property name}, and 369 * consumes it. 370 * 371 * @throws IOException if the next token in the stream is not a property 372 * name. 373 */ 374 public String nextName() throws IOException { 375 peek(); 376 if (token != JsonToken.NAME) { 377 throw new IllegalStateException("Expected a name but was " + peek()); 378 } 379 String result = name; 380 advance(); 381 return result; 382 } 383 384 /** 385 * Returns the {@link JsonToken#STRING string} value of the next token, 386 * consuming it. If the next token is a number, this method will return its 387 * string form. 388 * 389 * @throws IllegalStateException if the next token is not a string or if 390 * this reader is closed. 391 */ 392 public String nextString() throws IOException { 393 peek(); 394 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 395 throw new IllegalStateException("Expected a string but was " + peek()); 396 } 397 398 String result = value; 399 advance(); 400 return result; 401 } 402 403 /** 404 * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token, 405 * consuming it. 406 * 407 * @throws IllegalStateException if the next token is not a boolean or if 408 * this reader is closed. 409 */ 410 public boolean nextBoolean() throws IOException { 411 peek(); 412 if (token != JsonToken.BOOLEAN) { 413 throw new IllegalStateException("Expected a boolean but was " + token); 414 } 415 416 boolean result = (value == TRUE); 417 advance(); 418 return result; 419 } 420 421 /** 422 * Consumes the next token from the JSON stream and asserts that it is a 423 * literal null. 424 * 425 * @throws IllegalStateException if the next token is not null or if this 426 * reader is closed. 427 */ 428 public void nextNull() throws IOException { 429 peek(); 430 if (token != JsonToken.NULL) { 431 throw new IllegalStateException("Expected null but was " + token); 432 } 433 434 advance(); 435 } 436 437 /** 438 * Returns the {@link JsonToken#NUMBER double} value of the next token, 439 * consuming it. If the next token is a string, this method will attempt to 440 * parse it as a double using {@link Double#parseDouble(String)}. 441 * 442 * @throws IllegalStateException if the next token is not a literal value. 443 */ 444 public double nextDouble() throws IOException { 445 peek(); 446 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 447 throw new IllegalStateException("Expected a double but was " + token); 448 } 449 450 double result = Double.parseDouble(value); 451 advance(); 452 return result; 453 } 454 455 /** 456 * Returns the {@link JsonToken#NUMBER long} value of the next token, 457 * consuming it. If the next token is a string, this method will attempt to 458 * parse it as a long. If the next token's numeric value cannot be exactly 459 * represented by a Java {@code long}, this method throws. 460 * 461 * @throws IllegalStateException if the next token is not a literal value. 462 * @throws NumberFormatException if the next literal value cannot be parsed 463 * as a number, or exactly represented as a long. 464 */ 465 public long nextLong() throws IOException { 466 peek(); 467 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 468 throw new IllegalStateException("Expected a long but was " + token); 469 } 470 471 long result; 472 try { 473 result = Long.parseLong(value); 474 } catch (NumberFormatException ignored) { 475 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 476 result = (long) asDouble; 477 if ((double) result != asDouble) { 478 throw new NumberFormatException(value); 479 } 480 } 481 482 advance(); 483 return result; 484 } 485 486 /** 487 * Returns the {@link JsonToken#NUMBER int} value of the next token, 488 * consuming it. If the next token is a string, this method will attempt to 489 * parse it as an int. If the next token's numeric value cannot be exactly 490 * represented by a Java {@code int}, this method throws. 491 * 492 * @throws IllegalStateException if the next token is not a literal value. 493 * @throws NumberFormatException if the next literal value cannot be parsed 494 * as a number, or exactly represented as an int. 495 */ 496 public int nextInt() throws IOException { 497 peek(); 498 if (token != JsonToken.STRING && token != JsonToken.NUMBER) { 499 throw new IllegalStateException("Expected an int but was " + token); 500 } 501 502 int result; 503 try { 504 result = Integer.parseInt(value); 505 } catch (NumberFormatException ignored) { 506 double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException 507 result = (int) asDouble; 508 if ((double) result != asDouble) { 509 throw new NumberFormatException(value); 510 } 511 } 512 513 advance(); 514 return result; 515 } 516 517 /** 518 * Closes this JSON reader and the underlying {@link Reader}. 519 */ 520 public void close() throws IOException { 521 value = null; 522 token = null; 523 stack.clear(); 524 stack.add(JsonScope.CLOSED); 525 in.close(); 526 } 527 528 /** 529 * Skips the next value recursively. If it is an object or array, all nested 530 * elements are skipped. This method is intended for use when the JSON token 531 * stream contains unrecognized or unhandled values. 532 */ 533 public void skipValue() throws IOException { 534 skipping = true; 535 try { 536 int count = 0; 537 do { 538 JsonToken token = advance(); 539 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) { 540 count++; 541 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) { 542 count--; 543 } 544 } while (count != 0); 545 } finally { 546 skipping = false; 547 } 548 } 549 550 private JsonScope peekStack() { 551 return stack.get(stack.size() - 1); 552 } 553 554 private JsonScope pop() { 555 return stack.remove(stack.size() - 1); 556 } 557 558 private void push(JsonScope newTop) { 559 stack.add(newTop); 560 } 561 562 /** 563 * Replace the value on the top of the stack with the given value. 564 */ 565 private void replaceTop(JsonScope newTop) { 566 stack.set(stack.size() - 1, newTop); 567 } 568 569 private JsonToken nextInArray(boolean firstElement) throws IOException { 570 if (firstElement) { 571 replaceTop(JsonScope.NONEMPTY_ARRAY); 572 } else { 573 /* Look for a comma before each element after the first element. */ 574 switch (nextNonWhitespace()) { 575 case ']': 576 pop(); 577 return token = JsonToken.END_ARRAY; 578 case ';': 579 checkLenient(); // fall-through 580 case ',': 581 break; 582 default: 583 throw syntaxError("Unterminated array"); 584 } 585 } 586 587 switch (nextNonWhitespace()) { 588 case ']': 589 if (firstElement) { 590 pop(); 591 return token = JsonToken.END_ARRAY; 592 } 593 // fall-through to handle ",]" 594 case ';': 595 case ',': 596 /* In lenient mode, a 0-length literal means 'null' */ 597 checkLenient(); 598 pos--; 599 value = "null"; 600 return token = JsonToken.NULL; 601 default: 602 pos--; 603 return nextValue(); 604 } 605 } 606 607 private JsonToken nextInObject(boolean firstElement) throws IOException { 608 /* 609 * Read delimiters. Either a comma/semicolon separating this and the 610 * previous name-value pair, or a close brace to denote the end of the 611 * object. 612 */ 613 if (firstElement) { 614 /* Peek to see if this is the empty object. */ 615 switch (nextNonWhitespace()) { 616 case '}': 617 pop(); 618 return token = JsonToken.END_OBJECT; 619 default: 620 pos--; 621 } 622 } else { 623 switch (nextNonWhitespace()) { 624 case '}': 625 pop(); 626 return token = JsonToken.END_OBJECT; 627 case ';': 628 case ',': 629 break; 630 default: 631 throw syntaxError("Unterminated object"); 632 } 633 } 634 635 /* Read the name. */ 636 int quote = nextNonWhitespace(); 637 switch (quote) { 638 case '\'': 639 checkLenient(); // fall-through 640 case '"': 641 name = nextString((char) quote); 642 break; 643 default: 644 checkLenient(); 645 pos--; 646 name = nextLiteral(false); 647 if (name.isEmpty()) { 648 throw syntaxError("Expected name"); 649 } 650 } 651 652 replaceTop(JsonScope.DANGLING_NAME); 653 return token = JsonToken.NAME; 654 } 655 656 private JsonToken objectValue() throws IOException { 657 /* 658 * Read the name/value separator. Usually a colon ':'. In lenient mode 659 * we also accept an equals sign '=', or an arrow "=>". 660 */ 661 switch (nextNonWhitespace()) { 662 case ':': 663 break; 664 case '=': 665 checkLenient(); 666 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') { 667 pos++; 668 } 669 break; 670 default: 671 throw syntaxError("Expected ':'"); 672 } 673 674 replaceTop(JsonScope.NONEMPTY_OBJECT); 675 return nextValue(); 676 } 677 678 private JsonToken nextValue() throws IOException { 679 int c = nextNonWhitespace(); 680 switch (c) { 681 case '{': 682 push(JsonScope.EMPTY_OBJECT); 683 return token = JsonToken.BEGIN_OBJECT; 684 685 case '[': 686 push(JsonScope.EMPTY_ARRAY); 687 return token = JsonToken.BEGIN_ARRAY; 688 689 case '\'': 690 checkLenient(); // fall-through 691 case '"': 692 value = nextString((char) c); 693 return token = JsonToken.STRING; 694 695 default: 696 pos--; 697 return readLiteral(); 698 } 699 } 700 701 /** 702 * Returns true once {@code limit - pos >= minimum}. If the data is 703 * exhausted before that many characters are available, this returns 704 * false. 705 */ 706 private boolean fillBuffer(int minimum) throws IOException { 707 if (limit != pos) { 708 limit -= pos; 709 System.arraycopy(buffer, pos, buffer, 0, limit); 710 } else { 711 limit = 0; 712 } 713 714 pos = 0; 715 int total; 716 while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) { 717 limit += total; 718 if (limit >= minimum) { 719 return true; 720 } 721 } 722 return false; 723 } 724 725 private int nextNonWhitespace() throws IOException { 726 while (pos < limit || fillBuffer(1)) { 727 int c = buffer[pos++]; 728 switch (c) { 729 case '\t': 730 case ' ': 731 case '\n': 732 case '\r': 733 continue; 734 735 case '/': 736 if (pos == limit && !fillBuffer(1)) { 737 return c; 738 } 739 740 checkLenient(); 741 char peek = buffer[pos]; 742 switch (peek) { 743 case '*': 744 // skip a /* c-style comment */ 745 pos++; 746 if (!skipTo("*/")) { 747 throw syntaxError("Unterminated comment"); 748 } 749 pos += 2; 750 continue; 751 752 case '/': 753 // skip a // end-of-line comment 754 pos++; 755 skipToEndOfLine(); 756 continue; 757 758 default: 759 return c; 760 } 761 762 case '#': 763 /* 764 * Skip a # hash end-of-line comment. The JSON RFC doesn't 765 * specify this behaviour, but it's required to parse 766 * existing documents. See http://b/2571423. 767 */ 768 checkLenient(); 769 skipToEndOfLine(); 770 continue; 771 772 default: 773 return c; 774 } 775 } 776 777 throw new EOFException("End of input"); 778 } 779 780 private void checkLenient() throws IOException { 781 if (!lenient) { 782 throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON"); 783 } 784 } 785 786 /** 787 * Advances the position until after the next newline character. If the line 788 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 789 * caller. 790 */ 791 private void skipToEndOfLine() throws IOException { 792 while (pos < limit || fillBuffer(1)) { 793 char c = buffer[pos++]; 794 if (c == '\r' || c == '\n') { 795 break; 796 } 797 } 798 } 799 800 private boolean skipTo(String toFind) throws IOException { 801 outer: 802 for (; pos + toFind.length() < limit || fillBuffer(toFind.length()); pos++) { 803 for (int c = 0; c < toFind.length(); c++) { 804 if (buffer[pos + c] != toFind.charAt(c)) { 805 continue outer; 806 } 807 } 808 return true; 809 } 810 return false; 811 } 812 813 /** 814 * Returns the string up to but not including {@code quote}, unescaping any 815 * character escape sequences encountered along the way. The opening quote 816 * should have already been read. This consumes the closing quote, but does 817 * not include it in the returned string. 818 * 819 * @param quote either ' or ". 820 * @throws NumberFormatException if any unicode escape sequences are 821 * malformed. 822 */ 823 private String nextString(char quote) throws IOException { 824 StringBuilder builder = null; 825 do { 826 /* the index of the first character not yet appended to the builder. */ 827 int start = pos; 828 while (pos < limit) { 829 int c = buffer[pos++]; 830 831 if (c == quote) { 832 if (skipping) { 833 return "skipped!"; 834 } else if (builder == null) { 835 return new String(buffer, start, pos - start - 1); 836 } else { 837 builder.append(buffer, start, pos - start - 1); 838 return builder.toString(); 839 } 840 841 } else if (c == '\\') { 842 if (builder == null) { 843 builder = new StringBuilder(); 844 } 845 builder.append(buffer, start, pos - start - 1); 846 builder.append(readEscapeCharacter()); 847 start = pos; 848 } 849 } 850 851 if (builder == null) { 852 builder = new StringBuilder(); 853 } 854 builder.append(buffer, start, pos - start); 855 } while (fillBuffer(1)); 856 857 throw syntaxError("Unterminated string"); 858 } 859 860 /** 861 * Reads the value up to but not including any delimiter characters. This 862 * does not consume the delimiter character. 863 * 864 * @param assignOffsetsOnly true for this method to only set the valuePos 865 * and valueLength fields and return a null result. This only works if 866 * the literal is short; a string is returned otherwise. 867 */ 868 private String nextLiteral(boolean assignOffsetsOnly) throws IOException { 869 StringBuilder builder = null; 870 valuePos = -1; 871 valueLength = 0; 872 int i = 0; 873 874 findNonLiteralCharacter: 875 while (true) { 876 for (; pos + i < limit; i++) { 877 switch (buffer[pos + i]) { 878 case '/': 879 case '\\': 880 case ';': 881 case '#': 882 case '=': 883 checkLenient(); // fall-through 884 case '{': 885 case '}': 886 case '[': 887 case ']': 888 case ':': 889 case ',': 890 case ' ': 891 case '\t': 892 case '\f': 893 case '\r': 894 case '\n': 895 break findNonLiteralCharacter; 896 } 897 } 898 899 /* 900 * Attempt to load the entire literal into the buffer at once. If 901 * we run out of input, add a non-literal character at the end so 902 * that decoding doesn't need to do bounds checks. 903 */ 904 if (i < buffer.length) { 905 if (fillBuffer(i + 1)) { 906 continue; 907 } else { 908 buffer[limit] = '\0'; 909 break; 910 } 911 } 912 913 // use a StringBuilder when the value is too long. It must be an unquoted string. 914 if (builder == null) { 915 builder = new StringBuilder(); 916 } 917 builder.append(buffer, pos, i); 918 valueLength += i; 919 pos += i; 920 i = 0; 921 if (!fillBuffer(1)) { 922 break; 923 } 924 } 925 926 String result; 927 if (assignOffsetsOnly && builder == null) { 928 valuePos = pos; 929 result = null; 930 } else if (skipping) { 931 result = "skipped!"; 932 } else if (builder == null) { 933 result = new String(buffer, pos, i); 934 } else { 935 builder.append(buffer, pos, i); 936 result = builder.toString(); 937 } 938 valueLength += i; 939 pos += i; 940 return result; 941 } 942 943 @Override public String toString() { 944 return getClass().getSimpleName() + " near " + getSnippet(); 945 } 946 947 /** 948 * Unescapes the character identified by the character or characters that 949 * immediately follow a backslash. The backslash '\' should have already 950 * been read. This supports both unicode escapes "u000A" and two-character 951 * escapes "\n". 952 * 953 * @throws NumberFormatException if any unicode escape sequences are 954 * malformed. 955 */ 956 private char readEscapeCharacter() throws IOException { 957 if (pos == limit && !fillBuffer(1)) { 958 throw syntaxError("Unterminated escape sequence"); 959 } 960 961 char escaped = buffer[pos++]; 962 switch (escaped) { 963 case 'u': 964 if (pos + 4 > limit && !fillBuffer(4)) { 965 throw syntaxError("Unterminated escape sequence"); 966 } 967 String hex = new String(buffer, pos, 4); 968 pos += 4; 969 return (char) Integer.parseInt(hex, 16); 970 971 case 't': 972 return '\t'; 973 974 case 'b': 975 return '\b'; 976 977 case 'n': 978 return '\n'; 979 980 case 'r': 981 return '\r'; 982 983 case 'f': 984 return '\f'; 985 986 case '\'': 987 case '"': 988 case '\\': 989 default: 990 return escaped; 991 } 992 } 993 994 /** 995 * Reads a null, boolean, numeric or unquoted string literal value. 996 */ 997 private JsonToken readLiteral() throws IOException { 998 value = nextLiteral(true); 999 if (valueLength == 0) { 1000 throw syntaxError("Expected literal value"); 1001 } 1002 token = decodeLiteral(); 1003 if (token == JsonToken.STRING) { 1004 checkLenient(); 1005 } 1006 return token; 1007 } 1008 1009 /** 1010 * Assigns {@code nextToken} based on the value of {@code nextValue}. 1011 */ 1012 private JsonToken decodeLiteral() throws IOException { 1013 if (valuePos == -1) { 1014 // it was too long to fit in the buffer so it can only be a string 1015 return JsonToken.STRING; 1016 } else if (valueLength == 4 1017 && ('n' == buffer[valuePos ] || 'N' == buffer[valuePos ]) 1018 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1]) 1019 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1020 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) { 1021 value = "null"; 1022 return JsonToken.NULL; 1023 } else if (valueLength == 4 1024 && ('t' == buffer[valuePos ] || 'T' == buffer[valuePos ]) 1025 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1]) 1026 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2]) 1027 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) { 1028 value = TRUE; 1029 return JsonToken.BOOLEAN; 1030 } else if (valueLength == 5 1031 && ('f' == buffer[valuePos ] || 'F' == buffer[valuePos ]) 1032 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1]) 1033 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2]) 1034 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3]) 1035 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) { 1036 value = FALSE; 1037 return JsonToken.BOOLEAN; 1038 } else { 1039 value = new String(buffer, valuePos, valueLength); 1040 return decodeNumber(buffer, valuePos, valueLength); 1041 } 1042 } 1043 1044 /** 1045 * Determine whether the characters is a JSON number. Numbers are of the 1046 * form -12.34e+56. Fractional and exponential parts are optional. Leading 1047 * zeroes are not allowed in the value or exponential part, but are allowed 1048 * in the fraction. 1049 */ 1050 private JsonToken decodeNumber(char[] chars, int offset, int length) { 1051 int i = offset; 1052 int c = chars[i]; 1053 1054 if (c == '-') { 1055 c = chars[++i]; 1056 } 1057 1058 if (c == '0') { 1059 c = chars[++i]; 1060 } else if (c >= '1' && c <= '9') { 1061 c = chars[++i]; 1062 while (c >= '0' && c <= '9') { 1063 c = chars[++i]; 1064 } 1065 } else { 1066 return JsonToken.STRING; 1067 } 1068 1069 if (c == '.') { 1070 c = chars[++i]; 1071 while (c >= '0' && c <= '9') { 1072 c = chars[++i]; 1073 } 1074 } 1075 1076 if (c == 'e' || c == 'E') { 1077 c = chars[++i]; 1078 if (c == '+' || c == '-') { 1079 c = chars[++i]; 1080 } 1081 if (c >= '0' && c <= '9') { 1082 c = chars[++i]; 1083 while (c >= '0' && c <= '9') { 1084 c = chars[++i]; 1085 } 1086 } else { 1087 return JsonToken.STRING; 1088 } 1089 } 1090 1091 if (i == offset + length) { 1092 return JsonToken.NUMBER; 1093 } else { 1094 return JsonToken.STRING; 1095 } 1096 } 1097 1098 /** 1099 * Throws a new IO exception with the given message and a context snippet 1100 * with this reader's content. 1101 */ 1102 private IOException syntaxError(String message) throws IOException { 1103 throw new MalformedJsonException(message + " near " + getSnippet()); 1104 } 1105 1106 private CharSequence getSnippet() { 1107 StringBuilder snippet = new StringBuilder(); 1108 int beforePos = Math.min(pos, 20); 1109 snippet.append(buffer, pos - beforePos, beforePos); 1110 int afterPos = Math.min(limit - pos, 20); 1111 snippet.append(buffer, pos, afterPos); 1112 return snippet; 1113 } 1114} 1115