JsonReader.java revision eb97c0ddc063176c26065fc6855188edf0c16e03
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.util;
18
19import java.io.EOFException;
20import java.io.IOException;
21import java.io.Reader;
22import java.io.Closeable;
23import java.util.ArrayList;
24import java.util.List;
25
26/**
27 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
28 * encoded value as a stream of tokens. This stream includes both literal
29 * values (strings, numbers, booleans, and nulls) as well as the begin and
30 * end delimiters of objects and arrays. The tokens are traversed in
31 * depth-first order, the same order that they appear in the JSON document.
32 * Within JSON objects, name/value pairs are represented by a single token.
33 *
34 * <h3>Parsing JSON</h3>
35 * To create a recursive descent parser for your own JSON streams, first create
36 * an entry point method that creates a {@code JsonReader}.
37 *
38 * <p>Next, create handler methods for each structure in your JSON text. You'll
39 * need a method for each object type and for each array type.
40 * <ul>
41 *   <li>Within <strong>array handling</strong> methods, first call {@link
42 *       #beginArray} to consume the array's opening bracket. Then create a
43 *       while loop that accumulates values, terminating when {@link #hasNext}
44 *       is false. Finally, read the array's closing bracket by calling {@link
45 *       #endArray}.
46 *   <li>Within <strong>object handling</strong> methods, first call {@link
47 *       #beginObject} to consume the object's opening brace. Then create a
48 *       while loop that assigns values to local variables based on their name.
49 *       This loop should terminate when {@link #hasNext} is false. Finally,
50 *       read the object's closing brace by calling {@link #endObject}.
51 * </ul>
52 * <p>When a nested object or array is encountered, delegate to the
53 * corresponding handler method.
54 *
55 * <p>When an unknown name is encountered, strict parsers should fail with an
56 * exception. Lenient parsers should call {@link #skipValue()} to recursively
57 * skip the value's nested tokens, which may otherwise conflict.
58 *
59 * <p>If a value may be null, you should first check using {@link #peek()}.
60 * Null literals can be consumed using either {@link #nextNull()} or {@link
61 * #skipValue()}.
62 *
63 * <h3>Example</h3>
64 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
65 * [
66 *   {
67 *     "id": 912345678901,
68 *     "text": "How do I read JSON on Android?",
69 *     "geo": null,
70 *     "user": {
71 *       "name": "android_newb",
72 *       "followers_count": 41
73 *      }
74 *   },
75 *   {
76 *     "id": 912345678902,
77 *     "text": "@android_newb just use android.util.JsonReader!",
78 *     "geo": [50.454722, -104.606667],
79 *     "user": {
80 *       "name": "jesse",
81 *       "followers_count": 2
82 *     }
83 *   }
84 * ]}</pre>
85 * This code implements the parser for the above structure: <pre>   {@code
86 *
87 *   public List<Message> readJsonStream(InputStream in) throws IOException {
88 *     JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
89 *     return readMessagesArray(reader);
90 *   }
91 *
92 *   public List<Message> readMessagesArray(JsonReader reader) throws IOException {
93 *     List<Message> messages = new ArrayList<Message>();
94 *
95 *     reader.beginArray();
96 *     while (reader.hasNext()) {
97 *       messages.add(readMessage(reader));
98 *     }
99 *     reader.endArray();
100 *     return messages;
101 *   }
102 *
103 *   public Message readMessage(JsonReader reader) throws IOException {
104 *     long id = -1;
105 *     String text = null;
106 *     User user = null;
107 *     List<Double> geo = null;
108 *
109 *     reader.beginObject();
110 *     while (reader.hasNext()) {
111 *       String name = reader.nextName();
112 *       if (name.equals("id")) {
113 *         id = reader.nextLong();
114 *       } else if (name.equals("text")) {
115 *         text = reader.nextString();
116 *       } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
117 *         geo = readDoublesArray(reader);
118 *       } else if (name.equals("user")) {
119 *         user = readUser(reader);
120 *       } else {
121 *         reader.skipValue();
122 *       }
123 *     }
124 *     reader.endObject();
125 *     return new Message(id, text, user, geo);
126 *   }
127 *
128 *   public List<Double> readDoublesArray(JsonReader reader) throws IOException {
129 *     List<Double> doubles = new ArrayList<Double>();
130 *
131 *     reader.beginArray();
132 *     while (reader.hasNext()) {
133 *       doubles.add(reader.nextDouble());
134 *     }
135 *     reader.endArray();
136 *     return doubles;
137 *   }
138 *
139 *   public User readUser(JsonReader reader) throws IOException {
140 *     String username = null;
141 *     int followersCount = -1;
142 *
143 *     reader.beginObject();
144 *     while (reader.hasNext()) {
145 *       String name = reader.nextName();
146 *       if (name.equals("name")) {
147 *         username = reader.nextString();
148 *       } else if (name.equals("followers_count")) {
149 *         followersCount = reader.nextInt();
150 *       } else {
151 *         reader.skipValue();
152 *       }
153 *     }
154 *     reader.endObject();
155 *     return new User(username, followersCount);
156 *   }}</pre>
157 *
158 * <h3>Number Handling</h3>
159 * This reader permits numeric values to be read as strings and string values to
160 * be read as numbers. For example, both elements of the JSON array {@code
161 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
162 * This behavior is intended to prevent lossy numeric conversions: double is
163 * JavaScript's only numeric type and very large values like {@code
164 * 9007199254740993} cannot be represented exactly on that platform. To minimize
165 * precision loss, extremely large values should be written and read as strings
166 * in JSON.
167 *
168 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
169 * of this class are not thread safe.
170 */
171public final class JsonReader implements Closeable {
172
173    private static final String TRUE = "true";
174    private static final String FALSE = "false";
175
176    /** The input JSON. */
177    private final Reader in;
178
179    /** True to accept non-spec compliant JSON */
180    private boolean lenient = false;
181
182    /**
183     * Use a manual buffer to easily read and unread upcoming characters, and
184     * also so we can create strings without an intermediate StringBuilder.
185     * We decode literals directly out of this buffer, so it must be at least as
186     * long as the longest token that can be reported as a number.
187     */
188    private final char[] buffer = new char[1024];
189    private int pos = 0;
190    private int limit = 0;
191
192    private final List<JsonScope> stack = new ArrayList<JsonScope>();
193    {
194        push(JsonScope.EMPTY_DOCUMENT);
195    }
196
197    /**
198     * The type of the next token to be returned by {@link #peek} and {@link
199     * #advance}. If null, peek() will assign a value.
200     */
201    private JsonToken token;
202
203    /** The text of the next name. */
204    private String name;
205
206    /*
207     * For the next literal value, we may have the text value, or the position
208     * and length in the buffer.
209     */
210    private String value;
211    private int valuePos;
212    private int valueLength;
213
214    /** True if we're currently handling a skipValue() call. */
215    private boolean skipping = false;
216
217    /**
218     * Creates a new instance that reads a JSON-encoded stream from {@code in}.
219     */
220    public JsonReader(Reader in) {
221        if (in == null) {
222            throw new NullPointerException("in == null");
223        }
224        this.in = in;
225    }
226
227    /**
228     * Configure this parser to be  be liberal in what it accepts. By default,
229     * this parser is strict and only accepts JSON as specified by <a
230     * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the
231     * parser to lenient causes it to ignore the following syntax errors:
232     *
233     * <ul>
234     *   <li>End of line comments starting with {@code //} or {@code #} and
235     *       ending with a newline character.
236     *   <li>C-style comments starting with {@code /*} and ending with
237     *       {@code *}{@code /}. Such comments may not be nested.
238     *   <li>Names that are unquoted or {@code 'single quoted'}.
239     *   <li>Strings that are unquoted or {@code 'single quoted'}.
240     *   <li>Array elements separated by {@code ;} instead of {@code ,}.
241     *   <li>Unnecessary array separators. These are interpreted as if null
242     *       was the omitted value.
243     *   <li>Names and values separated by {@code =} or {@code =>} instead of
244     *       {@code :}.
245     *   <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
246     * </ul>
247     */
248    public void setLenient(boolean lenient) {
249        this.lenient = lenient;
250    }
251
252    /**
253     * Returns true if this parser is liberal in what it accepts.
254     */
255    public boolean isLenient() {
256        return lenient;
257    }
258
259    /**
260     * Consumes the next token from the JSON stream and asserts that it is the
261     * beginning of a new array.
262     */
263    public void beginArray() throws IOException {
264        expect(JsonToken.BEGIN_ARRAY);
265    }
266
267    /**
268     * Consumes the next token from the JSON stream and asserts that it is the
269     * end of the current array.
270     */
271    public void endArray() throws IOException {
272        expect(JsonToken.END_ARRAY);
273    }
274
275    /**
276     * Consumes the next token from the JSON stream and asserts that it is the
277     * beginning of a new object.
278     */
279    public void beginObject() throws IOException {
280        expect(JsonToken.BEGIN_OBJECT);
281    }
282
283    /**
284     * Consumes the next token from the JSON stream and asserts that it is the
285     * end of the current array.
286     */
287    public void endObject() throws IOException {
288        expect(JsonToken.END_OBJECT);
289    }
290
291    /**
292     * Consumes {@code expected}.
293     */
294    private void expect(JsonToken expected) throws IOException {
295        peek();
296        if (token != expected) {
297            throw new IllegalStateException("Expected " + expected + " but was " + peek());
298        }
299        advance();
300    }
301
302    /**
303     * Returns true if the current array or object has another element.
304     */
305    public boolean hasNext() throws IOException {
306        peek();
307        return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY;
308    }
309
310    /**
311     * Returns the type of the next token without consuming it.
312     */
313    public JsonToken peek() throws IOException {
314        if (token != null) {
315          return token;
316        }
317
318        switch (peekStack()) {
319            case EMPTY_DOCUMENT:
320                replaceTop(JsonScope.NONEMPTY_DOCUMENT);
321                JsonToken firstToken = nextValue();
322                if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) {
323                    throw new IOException(
324                            "Expected JSON document to start with '[' or '{' but was " + token);
325                }
326                return firstToken;
327            case EMPTY_ARRAY:
328                return nextInArray(true);
329            case NONEMPTY_ARRAY:
330                return nextInArray(false);
331            case EMPTY_OBJECT:
332                return nextInObject(true);
333            case DANGLING_NAME:
334                return objectValue();
335            case NONEMPTY_OBJECT:
336                return nextInObject(false);
337            case NONEMPTY_DOCUMENT:
338                try {
339                    JsonToken token = nextValue();
340                    if (lenient) {
341                        return token;
342                    }
343                    throw syntaxError("Expected EOF");
344                } catch (EOFException e) {
345                    return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here?
346                }
347            case CLOSED:
348                throw new IllegalStateException("JsonReader is closed");
349            default:
350                throw new AssertionError();
351        }
352    }
353
354    /**
355     * Advances the cursor in the JSON stream to the next token.
356     */
357    private JsonToken advance() throws IOException {
358        peek();
359
360        JsonToken result = token;
361        token = null;
362        value = null;
363        name = null;
364        return result;
365    }
366
367    /**
368     * Returns the next token, a {@link JsonToken#NAME property name}, and
369     * consumes it.
370     *
371     * @throws IOException if the next token in the stream is not a property
372     *     name.
373     */
374    public String nextName() throws IOException {
375        peek();
376        if (token != JsonToken.NAME) {
377            throw new IllegalStateException("Expected a name but was " + peek());
378        }
379        String result = name;
380        advance();
381        return result;
382    }
383
384    /**
385     * Returns the {@link JsonToken#STRING string} value of the next token,
386     * consuming it. If the next token is a number, this method will return its
387     * string form.
388     *
389     * @throws IllegalStateException if the next token is not a string or if
390     *     this reader is closed.
391     */
392    public String nextString() throws IOException {
393        peek();
394        if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
395            throw new IllegalStateException("Expected a string but was " + peek());
396        }
397
398        String result = value;
399        advance();
400        return result;
401    }
402
403    /**
404     * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token,
405     * consuming it.
406     *
407     * @throws IllegalStateException if the next token is not a boolean or if
408     *     this reader is closed.
409     */
410    public boolean nextBoolean() throws IOException {
411        peek();
412        if (token != JsonToken.BOOLEAN) {
413            throw new IllegalStateException("Expected a boolean but was " + token);
414        }
415
416        boolean result = (value == TRUE);
417        advance();
418        return result;
419    }
420
421    /**
422     * Consumes the next token from the JSON stream and asserts that it is a
423     * literal null.
424     *
425     * @throws IllegalStateException if the next token is not null or if this
426     *     reader is closed.
427     */
428    public void nextNull() throws IOException {
429        peek();
430        if (token != JsonToken.NULL) {
431            throw new IllegalStateException("Expected null but was " + token);
432        }
433
434        advance();
435    }
436
437    /**
438     * Returns the {@link JsonToken#NUMBER double} value of the next token,
439     * consuming it. If the next token is a string, this method will attempt to
440     * parse it as a double using {@link Double#parseDouble(String)}.
441     *
442     * @throws IllegalStateException if the next token is not a literal value.
443     */
444    public double nextDouble() throws IOException {
445        peek();
446        if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
447            throw new IllegalStateException("Expected a double but was " + token);
448        }
449
450        double result = Double.parseDouble(value);
451        advance();
452        return result;
453    }
454
455    /**
456     * Returns the {@link JsonToken#NUMBER long} value of the next token,
457     * consuming it. If the next token is a string, this method will attempt to
458     * parse it as a long. If the next token's numeric value cannot be exactly
459     * represented by a Java {@code long}, this method throws.
460     *
461     * @throws IllegalStateException if the next token is not a literal value.
462     * @throws NumberFormatException if the next literal value cannot be parsed
463     *     as a number, or exactly represented as a long.
464     */
465    public long nextLong() throws IOException {
466        peek();
467        if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
468            throw new IllegalStateException("Expected a long but was " + token);
469        }
470
471        long result;
472        try {
473            result = Long.parseLong(value);
474        } catch (NumberFormatException ignored) {
475            double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
476            result = (long) asDouble;
477            if ((double) result != asDouble) {
478                throw new NumberFormatException(value);
479            }
480        }
481
482        advance();
483        return result;
484    }
485
486    /**
487     * Returns the {@link JsonToken#NUMBER int} value of the next token,
488     * consuming it. If the next token is a string, this method will attempt to
489     * parse it as an int. If the next token's numeric value cannot be exactly
490     * represented by a Java {@code int}, this method throws.
491     *
492     * @throws IllegalStateException if the next token is not a literal value.
493     * @throws NumberFormatException if the next literal value cannot be parsed
494     *     as a number, or exactly represented as an int.
495     */
496    public int nextInt() throws IOException {
497        peek();
498        if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
499            throw new IllegalStateException("Expected an int but was " + token);
500        }
501
502        int result;
503        try {
504            result = Integer.parseInt(value);
505        } catch (NumberFormatException ignored) {
506            double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
507            result = (int) asDouble;
508            if ((double) result != asDouble) {
509                throw new NumberFormatException(value);
510            }
511        }
512
513        advance();
514        return result;
515    }
516
517    /**
518     * Closes this JSON reader and the underlying {@link Reader}.
519     */
520    public void close() throws IOException {
521        value = null;
522        token = null;
523        stack.clear();
524        stack.add(JsonScope.CLOSED);
525        in.close();
526    }
527
528    /**
529     * Skips the next value recursively. If it is an object or array, all nested
530     * elements are skipped. This method is intended for use when the JSON token
531     * stream contains unrecognized or unhandled values.
532     */
533    public void skipValue() throws IOException {
534        skipping = true;
535        try {
536            int count = 0;
537            do {
538                JsonToken token = advance();
539                if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) {
540                    count++;
541                } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) {
542                    count--;
543                }
544            } while (count != 0);
545        } finally {
546            skipping = false;
547        }
548    }
549
550    private JsonScope peekStack() {
551        return stack.get(stack.size() - 1);
552    }
553
554    private JsonScope pop() {
555        return stack.remove(stack.size() - 1);
556    }
557
558    private void push(JsonScope newTop) {
559        stack.add(newTop);
560    }
561
562    /**
563     * Replace the value on the top of the stack with the given value.
564     */
565    private void replaceTop(JsonScope newTop) {
566        stack.set(stack.size() - 1, newTop);
567    }
568
569    private JsonToken nextInArray(boolean firstElement) throws IOException {
570        if (firstElement) {
571            replaceTop(JsonScope.NONEMPTY_ARRAY);
572        } else {
573            /* Look for a comma before each element after the first element. */
574            switch (nextNonWhitespace()) {
575                case ']':
576                    pop();
577                    return token = JsonToken.END_ARRAY;
578                case ';':
579                    checkLenient(); // fall-through
580                case ',':
581                    break;
582                default:
583                    throw syntaxError("Unterminated array");
584            }
585        }
586
587        switch (nextNonWhitespace()) {
588            case ']':
589                if (firstElement) {
590                    pop();
591                    return token = JsonToken.END_ARRAY;
592                }
593                // fall-through to handle ",]"
594            case ';':
595            case ',':
596                /* In lenient mode, a 0-length literal means 'null' */
597                checkLenient();
598                pos--;
599                value = "null";
600                return token = JsonToken.NULL;
601            default:
602                pos--;
603                return nextValue();
604        }
605    }
606
607    private JsonToken nextInObject(boolean firstElement) throws IOException {
608        /*
609         * Read delimiters. Either a comma/semicolon separating this and the
610         * previous name-value pair, or a close brace to denote the end of the
611         * object.
612         */
613        if (firstElement) {
614            /* Peek to see if this is the empty object. */
615            switch (nextNonWhitespace()) {
616                case '}':
617                    pop();
618                    return token = JsonToken.END_OBJECT;
619                default:
620                    pos--;
621            }
622        } else {
623            switch (nextNonWhitespace()) {
624                case '}':
625                    pop();
626                    return token = JsonToken.END_OBJECT;
627                case ';':
628                case ',':
629                    break;
630                default:
631                    throw syntaxError("Unterminated object");
632            }
633        }
634
635        /* Read the name. */
636        int quote = nextNonWhitespace();
637        switch (quote) {
638            case '\'':
639                checkLenient(); // fall-through
640            case '"':
641                name = nextString((char) quote);
642                break;
643            default:
644                checkLenient();
645                pos--;
646                name = nextLiteral(false);
647                if (name.isEmpty()) {
648                    throw syntaxError("Expected name");
649                }
650        }
651
652        replaceTop(JsonScope.DANGLING_NAME);
653        return token = JsonToken.NAME;
654    }
655
656    private JsonToken objectValue() throws IOException {
657        /*
658         * Read the name/value separator. Usually a colon ':'. In lenient mode
659         * we also accept an equals sign '=', or an arrow "=>".
660         */
661        switch (nextNonWhitespace()) {
662            case ':':
663                break;
664            case '=':
665                checkLenient();
666                if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') {
667                    pos++;
668                }
669                break;
670            default:
671                throw syntaxError("Expected ':'");
672        }
673
674        replaceTop(JsonScope.NONEMPTY_OBJECT);
675        return nextValue();
676    }
677
678    private JsonToken nextValue() throws IOException {
679        int c = nextNonWhitespace();
680        switch (c) {
681            case '{':
682                push(JsonScope.EMPTY_OBJECT);
683                return token = JsonToken.BEGIN_OBJECT;
684
685            case '[':
686                push(JsonScope.EMPTY_ARRAY);
687                return token = JsonToken.BEGIN_ARRAY;
688
689            case '\'':
690                checkLenient(); // fall-through
691            case '"':
692                value = nextString((char) c);
693                return token = JsonToken.STRING;
694
695            default:
696                pos--;
697                return readLiteral();
698        }
699    }
700
701    /**
702     * Returns true once {@code limit - pos >= minimum}. If the data is
703     * exhausted before that many characters are available, this returns
704     * false.
705     */
706    private boolean fillBuffer(int minimum) throws IOException {
707        if (limit != pos) {
708            limit -= pos;
709            System.arraycopy(buffer, pos, buffer, 0, limit);
710        } else {
711            limit = 0;
712        }
713
714        pos = 0;
715        int total;
716        while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
717            limit += total;
718            if (limit >= minimum) {
719                return true;
720            }
721        }
722        return false;
723    }
724
725    private int nextNonWhitespace() throws IOException {
726        while (pos < limit || fillBuffer(1)) {
727            int c = buffer[pos++];
728            switch (c) {
729                case '\t':
730                case ' ':
731                case '\n':
732                case '\r':
733                    continue;
734
735                case '/':
736                    if (pos == limit && !fillBuffer(1)) {
737                        return c;
738                    }
739
740                    checkLenient();
741                    char peek = buffer[pos];
742                    switch (peek) {
743                        case '*':
744                            // skip a /* c-style comment */
745                            pos++;
746                            if (!skipTo("*/")) {
747                                throw syntaxError("Unterminated comment");
748                            }
749                            pos += 2;
750                            continue;
751
752                        case '/':
753                            // skip a // end-of-line comment
754                            pos++;
755                            skipToEndOfLine();
756                            continue;
757
758                        default:
759                            return c;
760                    }
761
762                case '#':
763                    /*
764                     * Skip a # hash end-of-line comment. The JSON RFC doesn't
765                     * specify this behaviour, but it's required to parse
766                     * existing documents. See http://b/2571423.
767                     */
768                    checkLenient();
769                    skipToEndOfLine();
770                    continue;
771
772                default:
773                    return c;
774            }
775        }
776
777        throw new EOFException("End of input");
778    }
779
780    private void checkLenient() throws IOException {
781        if (!lenient) {
782            throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
783        }
784    }
785
786    /**
787     * Advances the position until after the next newline character. If the line
788     * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
789     * caller.
790     */
791    private void skipToEndOfLine() throws IOException {
792        while (pos < limit || fillBuffer(1)) {
793            char c = buffer[pos++];
794            if (c == '\r' || c == '\n') {
795                break;
796            }
797        }
798    }
799
800    private boolean skipTo(String toFind) throws IOException {
801        outer:
802        for (; pos + toFind.length() < limit || fillBuffer(toFind.length()); pos++) {
803            for (int c = 0; c < toFind.length(); c++) {
804                if (buffer[pos + c] != toFind.charAt(c)) {
805                    continue outer;
806                }
807            }
808            return true;
809        }
810        return false;
811    }
812
813    /**
814     * Returns the string up to but not including {@code quote}, unescaping any
815     * character escape sequences encountered along the way. The opening quote
816     * should have already been read. This consumes the closing quote, but does
817     * not include it in the returned string.
818     *
819     * @param quote either ' or ".
820     * @throws NumberFormatException if any unicode escape sequences are
821     *     malformed.
822     */
823    private String nextString(char quote) throws IOException {
824        StringBuilder builder = null;
825        do {
826            /* the index of the first character not yet appended to the builder. */
827            int start = pos;
828            while (pos < limit) {
829                int c = buffer[pos++];
830
831                if (c == quote) {
832                    if (skipping) {
833                        return "skipped!";
834                    } else if (builder == null) {
835                        return new String(buffer, start, pos - start - 1);
836                    } else {
837                        builder.append(buffer, start, pos - start - 1);
838                        return builder.toString();
839                    }
840
841                } else if (c == '\\') {
842                    if (builder == null) {
843                        builder = new StringBuilder();
844                    }
845                    builder.append(buffer, start, pos - start - 1);
846                    builder.append(readEscapeCharacter());
847                    start = pos;
848                }
849            }
850
851            if (builder == null) {
852                builder = new StringBuilder();
853            }
854            builder.append(buffer, start, pos - start);
855        } while (fillBuffer(1));
856
857        throw syntaxError("Unterminated string");
858    }
859
860    /**
861     * Reads the value up to but not including any delimiter characters. This
862     * does not consume the delimiter character.
863     *
864     * @param assignOffsetsOnly true for this method to only set the valuePos
865     *     and valueLength fields and return a null result. This only works if
866     *     the literal is short; a string is returned otherwise.
867     */
868    private String nextLiteral(boolean assignOffsetsOnly) throws IOException {
869        StringBuilder builder = null;
870        valuePos = -1;
871        valueLength = 0;
872        int i = 0;
873
874        findNonLiteralCharacter:
875        while (true) {
876            for (; pos + i < limit; i++) {
877                switch (buffer[pos + i]) {
878                case '/':
879                case '\\':
880                case ';':
881                case '#':
882                case '=':
883                    checkLenient(); // fall-through
884                case '{':
885                case '}':
886                case '[':
887                case ']':
888                case ':':
889                case ',':
890                case ' ':
891                case '\t':
892                case '\f':
893                case '\r':
894                case '\n':
895                    break findNonLiteralCharacter;
896                }
897            }
898
899            /*
900             * Attempt to load the entire literal into the buffer at once. If
901             * we run out of input, add a non-literal character at the end so
902             * that decoding doesn't need to do bounds checks.
903             */
904            if (i < buffer.length) {
905                if (fillBuffer(i + 1)) {
906                    continue;
907                } else {
908                    buffer[limit] = '\0';
909                    break;
910                }
911            }
912
913            // use a StringBuilder when the value is too long. It must be an unquoted string.
914            if (builder == null) {
915                builder = new StringBuilder();
916            }
917            builder.append(buffer, pos, i);
918            valueLength += i;
919            pos += i;
920            i = 0;
921            if (!fillBuffer(1)) {
922                break;
923            }
924        }
925
926        String result;
927        if (assignOffsetsOnly && builder == null) {
928            valuePos = pos;
929            result = null;
930        } else if (skipping) {
931            result = "skipped!";
932        } else if (builder == null) {
933            result = new String(buffer, pos, i);
934        } else {
935            builder.append(buffer, pos, i);
936            result = builder.toString();
937        }
938        valueLength += i;
939        pos += i;
940        return result;
941    }
942
943    @Override public String toString() {
944        return getClass().getSimpleName() + " near " + getSnippet();
945    }
946
947    /**
948     * Unescapes the character identified by the character or characters that
949     * immediately follow a backslash. The backslash '\' should have already
950     * been read. This supports both unicode escapes "u000A" and two-character
951     * escapes "\n".
952     *
953     * @throws NumberFormatException if any unicode escape sequences are
954     *     malformed.
955     */
956    private char readEscapeCharacter() throws IOException {
957        if (pos == limit && !fillBuffer(1)) {
958            throw syntaxError("Unterminated escape sequence");
959        }
960
961        char escaped = buffer[pos++];
962        switch (escaped) {
963            case 'u':
964                if (pos + 4 > limit && !fillBuffer(4)) {
965                    throw syntaxError("Unterminated escape sequence");
966                }
967                String hex = new String(buffer, pos, 4);
968                pos += 4;
969                return (char) Integer.parseInt(hex, 16);
970
971            case 't':
972                return '\t';
973
974            case 'b':
975                return '\b';
976
977            case 'n':
978                return '\n';
979
980            case 'r':
981                return '\r';
982
983            case 'f':
984                return '\f';
985
986            case '\'':
987            case '"':
988            case '\\':
989            default:
990                return escaped;
991        }
992    }
993
994    /**
995     * Reads a null, boolean, numeric or unquoted string literal value.
996     */
997    private JsonToken readLiteral() throws IOException {
998        value = nextLiteral(true);
999        if (valueLength == 0) {
1000            throw syntaxError("Expected literal value");
1001        }
1002        token = decodeLiteral();
1003        if (token == JsonToken.STRING) {
1004          checkLenient();
1005        }
1006        return token;
1007    }
1008
1009    /**
1010     * Assigns {@code nextToken} based on the value of {@code nextValue}.
1011     */
1012    private JsonToken decodeLiteral() throws IOException {
1013        if (valuePos == -1) {
1014            // it was too long to fit in the buffer so it can only be a string
1015            return JsonToken.STRING;
1016        } else if (valueLength == 4
1017                && ('n' == buffer[valuePos    ] || 'N' == buffer[valuePos    ])
1018                && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1])
1019                && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1020                && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) {
1021            value = "null";
1022            return JsonToken.NULL;
1023        } else if (valueLength == 4
1024                && ('t' == buffer[valuePos    ] || 'T' == buffer[valuePos    ])
1025                && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1])
1026                && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2])
1027                && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) {
1028            value = TRUE;
1029            return JsonToken.BOOLEAN;
1030        } else if (valueLength == 5
1031                && ('f' == buffer[valuePos    ] || 'F' == buffer[valuePos    ])
1032                && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1])
1033                && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1034                && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3])
1035                && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) {
1036            value = FALSE;
1037            return JsonToken.BOOLEAN;
1038        } else {
1039            value = new String(buffer, valuePos, valueLength);
1040            return decodeNumber(buffer, valuePos, valueLength);
1041        }
1042    }
1043
1044    /**
1045     * Determine whether the characters is a JSON number. Numbers are of the
1046     * form -12.34e+56. Fractional and exponential parts are optional. Leading
1047     * zeroes are not allowed in the value or exponential part, but are allowed
1048     * in the fraction.
1049     */
1050    private JsonToken decodeNumber(char[] chars, int offset, int length) {
1051        int i = offset;
1052        int c = chars[i];
1053
1054        if (c == '-') {
1055            c = chars[++i];
1056        }
1057
1058        if (c == '0') {
1059            c = chars[++i];
1060        } else if (c >= '1' && c <= '9') {
1061            c = chars[++i];
1062            while (c >= '0' && c <= '9') {
1063                c = chars[++i];
1064            }
1065        } else {
1066            return JsonToken.STRING;
1067        }
1068
1069        if (c == '.') {
1070            c = chars[++i];
1071            while (c >= '0' && c <= '9') {
1072                c = chars[++i];
1073            }
1074        }
1075
1076        if (c == 'e' || c == 'E') {
1077            c = chars[++i];
1078            if (c == '+' || c == '-') {
1079                c = chars[++i];
1080            }
1081            if (c >= '0' && c <= '9') {
1082                c = chars[++i];
1083                while (c >= '0' && c <= '9') {
1084                    c = chars[++i];
1085                }
1086            } else {
1087                return JsonToken.STRING;
1088            }
1089        }
1090
1091        if (i == offset + length) {
1092            return JsonToken.NUMBER;
1093        } else {
1094            return JsonToken.STRING;
1095        }
1096    }
1097
1098    /**
1099     * Throws a new IO exception with the given message and a context snippet
1100     * with this reader's content.
1101     */
1102    private IOException syntaxError(String message) throws IOException {
1103        throw new MalformedJsonException(message + " near " + getSnippet());
1104    }
1105
1106    private CharSequence getSnippet() {
1107        StringBuilder snippet = new StringBuilder();
1108        int beforePos = Math.min(pos, 20);
1109        snippet.append(buffer, pos - beforePos, beforePos);
1110        int afterPos = Math.min(limit - pos, 20);
1111        snippet.append(buffer, pos, afterPos);
1112        return snippet;
1113    }
1114}
1115