1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.json;
18
19// Note: this class was written without inspecting the non-free org.json sourcecode.
20
21/**
22 * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
23 * encoded string into the corresponding object. Most clients of
24 * this class will use only need the {@link #JSONTokener(String) constructor}
25 * and {@link #nextValue} method. Example usage: <pre>
26 * String json = "{"
27 *         + "  \"query\": \"Pizza\", "
28 *         + "  \"locations\": [ 94043, 90210 ] "
29 *         + "}";
30 *
31 * JSONObject object = (JSONObject) new JSONTokener(json).nextValue();
32 * String query = object.getString("query");
33 * JSONArray locations = object.getJSONArray("locations");</pre>
34 *
35 * <p>For best interoperability and performance use JSON that complies with
36 * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons
37 * this parser is lenient, so a successful parse does not indicate that the
38 * input string was valid JSON. All of the following syntax errors will be
39 * ignored:
40 * <ul>
41 *   <li>End of line comments starting with {@code //} or {@code #} and ending
42 *       with a newline character.
43 *   <li>C-style comments starting with {@code /*} and ending with
44 *       {@code *}{@code /}. Such comments may not be nested.
45 *   <li>Strings that are unquoted or {@code 'single quoted'}.
46 *   <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}.
47 *   <li>Octal integers prefixed with {@code 0}.
48 *   <li>Array elements separated by {@code ;}.
49 *   <li>Unnecessary array separators. These are interpreted as if null was the
50 *       omitted value.
51 *   <li>Key-value pairs separated by {@code =} or {@code =>}.
52 *   <li>Key-value pairs separated by {@code ;}.
53 * </ul>
54 *
55 * <p>Each tokener may be used to parse a single JSON string. Instances of this
56 * class are not thread safe. Although this class is nonfinal, it was not
57 * designed for inheritance and should not be subclassed. In particular,
58 * self-use by overrideable methods is not specified. See <i>Effective Java</i>
59 * Item 17, "Design and Document or inheritance or else prohibit it" for further
60 * information.
61 */
62public class JSONTokener {
63
64    /** The input JSON. */
65    private final String in;
66
67    /**
68     * The index of the next character to be returned by {@link #next}. When
69     * the input is exhausted, this equals the input's length.
70     */
71    private int pos;
72
73    /**
74     * @param in JSON encoded string. Null is not permitted and will yield a
75     *     tokener that throws {@code NullPointerExceptions} when methods are
76     *     called.
77     */
78    public JSONTokener(String in) {
79        // consume an optional byte order mark (BOM) if it exists
80        if (in != null && in.startsWith("\ufeff")) {
81            in = in.substring(1);
82        }
83        this.in = in;
84    }
85
86    /**
87     * Returns the next value from the input.
88     *
89     * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean,
90     *     Integer, Long, Double or {@link JSONObject#NULL}.
91     * @throws JSONException if the input is malformed.
92     */
93    public Object nextValue() throws JSONException {
94        int c = nextCleanInternal();
95        switch (c) {
96            case -1:
97                throw syntaxError("End of input");
98
99            case '{':
100                return readObject();
101
102            case '[':
103                return readArray();
104
105            case '\'':
106            case '"':
107                return nextString((char) c);
108
109            default:
110                pos--;
111                return readLiteral();
112        }
113    }
114
115    private int nextCleanInternal() throws JSONException {
116        while (pos < in.length()) {
117            int c = in.charAt(pos++);
118            switch (c) {
119                case '\t':
120                case ' ':
121                case '\n':
122                case '\r':
123                    continue;
124
125                case '/':
126                    if (pos == in.length()) {
127                        return c;
128                    }
129
130                    char peek = in.charAt(pos);
131                    switch (peek) {
132                        case '*':
133                            // skip a /* c-style comment */
134                            pos++;
135                            int commentEnd = in.indexOf("*/", pos);
136                            if (commentEnd == -1) {
137                                throw syntaxError("Unterminated comment");
138                            }
139                            pos = commentEnd + 2;
140                            continue;
141
142                        case '/':
143                            // skip a // end-of-line comment
144                            pos++;
145                            skipToEndOfLine();
146                            continue;
147
148                        default:
149                            return c;
150                    }
151
152                case '#':
153                    /*
154                     * Skip a # hash end-of-line comment. The JSON RFC doesn't
155                     * specify this behavior, but it's required to parse
156                     * existing documents. See http://b/2571423.
157                     */
158                    skipToEndOfLine();
159                    continue;
160
161                default:
162                    return c;
163            }
164        }
165
166        return -1;
167    }
168
169    /**
170     * Advances the position until after the next newline character. If the line
171     * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
172     * caller.
173     */
174    private void skipToEndOfLine() {
175        for (; pos < in.length(); pos++) {
176            char c = in.charAt(pos);
177            if (c == '\r' || c == '\n') {
178                pos++;
179                break;
180            }
181        }
182    }
183
184    /**
185     * Returns the string up to but not including {@code quote}, unescaping any
186     * character escape sequences encountered along the way. The opening quote
187     * should have already been read. This consumes the closing quote, but does
188     * not include it in the returned string.
189     *
190     * @param quote either ' or ".
191     */
192    public String nextString(char quote) throws JSONException {
193        /*
194         * For strings that are free of escape sequences, we can just extract
195         * the result as a substring of the input. But if we encounter an escape
196         * sequence, we need to use a StringBuilder to compose the result.
197         */
198        StringBuilder builder = null;
199
200        /* the index of the first character not yet appended to the builder. */
201        int start = pos;
202
203        while (pos < in.length()) {
204            int c = in.charAt(pos++);
205            if (c == quote) {
206                if (builder == null) {
207                    // a new string avoids leaking memory
208                    return new String(in.substring(start, pos - 1));
209                } else {
210                    builder.append(in, start, pos - 1);
211                    return builder.toString();
212                }
213            }
214
215            if (c == '\\') {
216                if (pos == in.length()) {
217                    throw syntaxError("Unterminated escape sequence");
218                }
219                if (builder == null) {
220                    builder = new StringBuilder();
221                }
222                builder.append(in, start, pos - 1);
223                builder.append(readEscapeCharacter());
224                start = pos;
225            }
226        }
227
228        throw syntaxError("Unterminated string");
229    }
230
231    /**
232     * Unescapes the character identified by the character or characters that
233     * immediately follow a backslash. The backslash '\' should have already
234     * been read. This supports both unicode escapes "u000A" and two-character
235     * escapes "\n".
236     */
237    private char readEscapeCharacter() throws JSONException {
238        char escaped = in.charAt(pos++);
239        switch (escaped) {
240            case 'u':
241                if (pos + 4 > in.length()) {
242                    throw syntaxError("Unterminated escape sequence");
243                }
244                String hex = in.substring(pos, pos + 4);
245                pos += 4;
246                try {
247                    return (char) Integer.parseInt(hex, 16);
248                } catch (NumberFormatException nfe) {
249                    throw syntaxError("Invalid escape sequence: " + hex);
250                }
251
252            case 't':
253                return '\t';
254
255            case 'b':
256                return '\b';
257
258            case 'n':
259                return '\n';
260
261            case 'r':
262                return '\r';
263
264            case 'f':
265                return '\f';
266
267            case '\'':
268            case '"':
269            case '\\':
270            default:
271                return escaped;
272        }
273    }
274
275    /**
276     * Reads a null, boolean, numeric or unquoted string literal value. Numeric
277     * values will be returned as an Integer, Long, or Double, in that order of
278     * preference.
279     */
280    private Object readLiteral() throws JSONException {
281        String literal = nextToInternal("{}[]/\\:,=;# \t\f");
282
283        if (literal.length() == 0) {
284            throw syntaxError("Expected literal value");
285        } else if ("null".equalsIgnoreCase(literal)) {
286            return JSONObject.NULL;
287        } else if ("true".equalsIgnoreCase(literal)) {
288            return Boolean.TRUE;
289        } else if ("false".equalsIgnoreCase(literal)) {
290            return Boolean.FALSE;
291        }
292
293        /* try to parse as an integral type... */
294        if (literal.indexOf('.') == -1) {
295            int base = 10;
296            String number = literal;
297            if (number.startsWith("0x") || number.startsWith("0X")) {
298                number = number.substring(2);
299                base = 16;
300            } else if (number.startsWith("0") && number.length() > 1) {
301                number = number.substring(1);
302                base = 8;
303            }
304            try {
305                long longValue = Long.parseLong(number, base);
306                if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) {
307                    return (int) longValue;
308                } else {
309                    return longValue;
310                }
311            } catch (NumberFormatException e) {
312                /*
313                 * This only happens for integral numbers greater than
314                 * Long.MAX_VALUE, numbers in exponential form (5e-10) and
315                 * unquoted strings. Fall through to try floating point.
316                 */
317            }
318        }
319
320        /* ...next try to parse as a floating point... */
321        try {
322            return Double.valueOf(literal);
323        } catch (NumberFormatException ignored) {
324        }
325
326        /* ... finally give up. We have an unquoted string */
327        return new String(literal); // a new string avoids leaking memory
328    }
329
330    /**
331     * Returns the string up to but not including any of the given characters or
332     * a newline character. This does not consume the excluded character.
333     */
334    private String nextToInternal(String excluded) {
335        int start = pos;
336        for (; pos < in.length(); pos++) {
337            char c = in.charAt(pos);
338            if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) {
339                return in.substring(start, pos);
340            }
341        }
342        return in.substring(start);
343    }
344
345    /**
346     * Reads a sequence of key/value pairs and the trailing closing brace '}' of
347     * an object. The opening brace '{' should have already been read.
348     */
349    private JSONObject readObject() throws JSONException {
350        JSONObject result = new JSONObject();
351
352        /* Peek to see if this is the empty object. */
353        int first = nextCleanInternal();
354        if (first == '}') {
355            return result;
356        } else if (first != -1) {
357            pos--;
358        }
359
360        while (true) {
361            Object name = nextValue();
362            if (!(name instanceof String)) {
363                if (name == null) {
364                    throw syntaxError("Names cannot be null");
365                } else {
366                    throw syntaxError("Names must be strings, but " + name
367                            + " is of type " + name.getClass().getName());
368                }
369            }
370
371            /*
372             * Expect the name/value separator to be either a colon ':', an
373             * equals sign '=', or an arrow "=>". The last two are bogus but we
374             * include them because that's what the original implementation did.
375             */
376            int separator = nextCleanInternal();
377            if (separator != ':' && separator != '=') {
378                throw syntaxError("Expected ':' after " + name);
379            }
380            if (pos < in.length() && in.charAt(pos) == '>') {
381                pos++;
382            }
383
384            result.put((String) name, nextValue());
385
386            switch (nextCleanInternal()) {
387                case '}':
388                    return result;
389                case ';':
390                case ',':
391                    continue;
392                default:
393                    throw syntaxError("Unterminated object");
394            }
395        }
396    }
397
398    /**
399     * Reads a sequence of values and the trailing closing brace ']' of an
400     * array. The opening brace '[' should have already been read. Note that
401     * "[]" yields an empty array, but "[,]" returns a two-element array
402     * equivalent to "[null,null]".
403     */
404    private JSONArray readArray() throws JSONException {
405        JSONArray result = new JSONArray();
406
407        /* to cover input that ends with ",]". */
408        boolean hasTrailingSeparator = false;
409
410        while (true) {
411            switch (nextCleanInternal()) {
412                case -1:
413                    throw syntaxError("Unterminated array");
414                case ']':
415                    if (hasTrailingSeparator) {
416                        result.put(null);
417                    }
418                    return result;
419                case ',':
420                case ';':
421                    /* A separator without a value first means "null". */
422                    result.put(null);
423                    hasTrailingSeparator = true;
424                    continue;
425                default:
426                    pos--;
427            }
428
429            result.put(nextValue());
430
431            switch (nextCleanInternal()) {
432                case ']':
433                    return result;
434                case ',':
435                case ';':
436                    hasTrailingSeparator = true;
437                    continue;
438                default:
439                    throw syntaxError("Unterminated array");
440            }
441        }
442    }
443
444    /**
445     * Returns an exception containing the given message plus the current
446     * position and the entire input string.
447     */
448    public JSONException syntaxError(String message) {
449        return new JSONException(message + this);
450    }
451
452    /**
453     * Returns the current position and the entire input string.
454     */
455    @Override public String toString() {
456        // consistent with the original implementation
457        return " at character " + pos + " of " + in;
458    }
459
460    /*
461     * Legacy APIs.
462     *
463     * None of the methods below are on the critical path of parsing JSON
464     * documents. They exist only because they were exposed by the original
465     * implementation and may be used by some clients.
466     */
467
468    /**
469     * Returns true until the input has been exhausted.
470     */
471    public boolean more() {
472        return pos < in.length();
473    }
474
475    /**
476     * Returns the next available character, or the null character '\0' if all
477     * input has been exhausted. The return value of this method is ambiguous
478     * for JSON strings that contain the character '\0'.
479     */
480    public char next() {
481        return pos < in.length() ? in.charAt(pos++) : '\0';
482    }
483
484    /**
485     * Returns the next available character if it equals {@code c}. Otherwise an
486     * exception is thrown.
487     */
488    public char next(char c) throws JSONException {
489        char result = next();
490        if (result != c) {
491            throw syntaxError("Expected " + c + " but was " + result);
492        }
493        return result;
494    }
495
496    /**
497     * Returns the next character that is not whitespace and does not belong to
498     * a comment. If the input is exhausted before such a character can be
499     * found, the null character '\0' is returned. The return value of this
500     * method is ambiguous for JSON strings that contain the character '\0'.
501     */
502    public char nextClean() throws JSONException {
503        int nextCleanInt = nextCleanInternal();
504        return nextCleanInt == -1 ? '\0' : (char) nextCleanInt;
505    }
506
507    /**
508     * Returns the next {@code length} characters of the input.
509     *
510     * <p>The returned string shares its backing character array with this
511     * tokener's input string. If a reference to the returned string may be held
512     * indefinitely, you should use {@code new String(result)} to copy it first
513     * to avoid memory leaks.
514     *
515     * @throws JSONException if the remaining input is not long enough to
516     *     satisfy this request.
517     */
518    public String next(int length) throws JSONException {
519        if (pos + length > in.length()) {
520            throw syntaxError(length + " is out of bounds");
521        }
522        String result = in.substring(pos, pos + length);
523        pos += length;
524        return result;
525    }
526
527    /**
528     * Returns the {@link String#trim trimmed} string holding the characters up
529     * to but not including the first of:
530     * <ul>
531     *   <li>any character in {@code excluded}
532     *   <li>a newline character '\n'
533     *   <li>a carriage return '\r'
534     * </ul>
535     *
536     * <p>The returned string shares its backing character array with this
537     * tokener's input string. If a reference to the returned string may be held
538     * indefinitely, you should use {@code new String(result)} to copy it first
539     * to avoid memory leaks.
540     *
541     * @return a possibly-empty string
542     */
543    public String nextTo(String excluded) {
544        if (excluded == null) {
545            throw new NullPointerException("excluded == null");
546        }
547        return nextToInternal(excluded).trim();
548    }
549
550    /**
551     * Equivalent to {@code nextTo(String.valueOf(excluded))}.
552     */
553    public String nextTo(char excluded) {
554        return nextToInternal(String.valueOf(excluded)).trim();
555    }
556
557    /**
558     * Advances past all input up to and including the next occurrence of
559     * {@code thru}. If the remaining input doesn't contain {@code thru}, the
560     * input is exhausted.
561     */
562    public void skipPast(String thru) {
563        int thruStart = in.indexOf(thru, pos);
564        pos = thruStart == -1 ? in.length() : (thruStart + thru.length());
565    }
566
567    /**
568     * Advances past all input up to but not including the next occurrence of
569     * {@code to}. If the remaining input doesn't contain {@code to}, the input
570     * is unchanged.
571     */
572    public char skipTo(char to) {
573        int index = in.indexOf(to, pos);
574        if (index != -1) {
575            pos = index;
576            return to;
577        } else {
578            return '\0';
579        }
580    }
581
582    /**
583     * Unreads the most recent character of input. If no input characters have
584     * been read, the input is unchanged.
585     */
586    public void back() {
587        if (--pos == -1) {
588            pos = 0;
589        }
590    }
591
592    /**
593     * Returns the integer [0..15] value for the given hex character, or -1
594     * for non-hex input.
595     *
596     * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other
597     *     character will yield a -1 result.
598     */
599    public static int dehexchar(char hex) {
600        if (hex >= '0' && hex <= '9') {
601            return hex - '0';
602        } else if (hex >= 'A' && hex <= 'F') {
603            return hex - 'A' + 10;
604        } else if (hex >= 'a' && hex <= 'f') {
605            return hex - 'a' + 10;
606        } else {
607            return -1;
608        }
609    }
610}
611