1/*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "core/inspector/JSONParser.h"
33
34#include "platform/JSONValues.h"
35#include "wtf/text/StringBuilder.h"
36
37namespace blink {
38
39namespace {
40
41const int stackLimit = 1000;
42
43enum Token {
44    ObjectBegin,
45    ObjectEnd,
46    ArrayBegin,
47    ArrayEnd,
48    StringLiteral,
49    Number,
50    BoolTrue,
51    BoolFalse,
52    NullToken,
53    ListSeparator,
54    ObjectPairSeparator,
55    InvalidToken,
56};
57
58const char* const nullString = "null";
59const char* const trueString = "true";
60const char* const falseString = "false";
61
62template<typename CharType>
63bool parseConstToken(const CharType* start, const CharType* end, const CharType** tokenEnd, const char* token)
64{
65    while (start < end && *token != '\0' && *start++ == *token++) { }
66    if (*token != '\0')
67        return false;
68    *tokenEnd = start;
69    return true;
70}
71
72template<typename CharType>
73bool readInt(const CharType* start, const CharType* end, const CharType** tokenEnd, bool canHaveLeadingZeros)
74{
75    if (start == end)
76        return false;
77    bool haveLeadingZero = '0' == *start;
78    int length = 0;
79    while (start < end && '0' <= *start && *start <= '9') {
80        ++start;
81        ++length;
82    }
83    if (!length)
84        return false;
85    if (!canHaveLeadingZeros && length > 1 && haveLeadingZero)
86        return false;
87    *tokenEnd = start;
88    return true;
89}
90
91template<typename CharType>
92bool parseNumberToken(const CharType* start, const CharType* end, const CharType** tokenEnd)
93{
94    // We just grab the number here. We validate the size in DecodeNumber.
95    // According to RFC4627, a valid number is: [minus] int [frac] [exp]
96    if (start == end)
97        return false;
98    CharType c = *start;
99    if ('-' == c)
100        ++start;
101
102    if (!readInt(start, end, &start, false))
103        return false;
104    if (start == end) {
105        *tokenEnd = start;
106        return true;
107    }
108
109    // Optional fraction part
110    c = *start;
111    if ('.' == c) {
112        ++start;
113        if (!readInt(start, end, &start, true))
114            return false;
115        if (start == end) {
116            *tokenEnd = start;
117            return true;
118        }
119        c = *start;
120    }
121
122    // Optional exponent part
123    if ('e' == c || 'E' == c) {
124        ++start;
125        if (start == end)
126            return false;
127        c = *start;
128        if ('-' == c || '+' == c) {
129            ++start;
130            if (start == end)
131                return false;
132        }
133        if (!readInt(start, end, &start, true))
134            return false;
135    }
136
137    *tokenEnd = start;
138    return true;
139}
140
141template<typename CharType>
142bool readHexDigits(const CharType* start, const CharType* end, const CharType** tokenEnd, int digits)
143{
144    if (end - start < digits)
145        return false;
146    for (int i = 0; i < digits; ++i) {
147        CharType c = *start++;
148        if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')))
149            return false;
150    }
151    *tokenEnd = start;
152    return true;
153}
154
155template<typename CharType>
156bool parseStringToken(const CharType* start, const CharType* end, const CharType** tokenEnd)
157{
158    while (start < end) {
159        CharType c = *start++;
160        if ('\\' == c) {
161            c = *start++;
162            // Make sure the escaped char is valid.
163            switch (c) {
164            case 'x':
165                if (!readHexDigits(start, end, &start, 2))
166                    return false;
167                break;
168            case 'u':
169                if (!readHexDigits(start, end, &start, 4))
170                    return false;
171                break;
172            case '\\':
173            case '/':
174            case 'b':
175            case 'f':
176            case 'n':
177            case 'r':
178            case 't':
179            case 'v':
180            case '"':
181                break;
182            default:
183                return false;
184            }
185        } else if ('"' == c) {
186            *tokenEnd = start;
187            return true;
188        }
189    }
190    return false;
191}
192
193template<typename CharType>
194Token parseToken(const CharType* start, const CharType* end, const CharType** tokenStart, const CharType** tokenEnd)
195{
196    while (start < end && isSpaceOrNewline(*start))
197        ++start;
198
199    if (start == end)
200        return InvalidToken;
201
202    *tokenStart = start;
203
204    switch (*start) {
205    case 'n':
206        if (parseConstToken(start, end, tokenEnd, nullString))
207            return NullToken;
208        break;
209    case 't':
210        if (parseConstToken(start, end, tokenEnd, trueString))
211            return BoolTrue;
212        break;
213    case 'f':
214        if (parseConstToken(start, end, tokenEnd, falseString))
215            return BoolFalse;
216        break;
217    case '[':
218        *tokenEnd = start + 1;
219        return ArrayBegin;
220    case ']':
221        *tokenEnd = start + 1;
222        return ArrayEnd;
223    case ',':
224        *tokenEnd = start + 1;
225        return ListSeparator;
226    case '{':
227        *tokenEnd = start + 1;
228        return ObjectBegin;
229    case '}':
230        *tokenEnd = start + 1;
231        return ObjectEnd;
232    case ':':
233        *tokenEnd = start + 1;
234        return ObjectPairSeparator;
235    case '0':
236    case '1':
237    case '2':
238    case '3':
239    case '4':
240    case '5':
241    case '6':
242    case '7':
243    case '8':
244    case '9':
245    case '-':
246        if (parseNumberToken(start, end, tokenEnd))
247            return Number;
248        break;
249    case '"':
250        if (parseStringToken(start + 1, end, tokenEnd))
251            return StringLiteral;
252        break;
253    }
254    return InvalidToken;
255}
256
257template<typename CharType>
258inline int hexToInt(CharType c)
259{
260    if ('0' <= c && c <= '9')
261        return c - '0';
262    if ('A' <= c && c <= 'F')
263        return c - 'A' + 10;
264    if ('a' <= c && c <= 'f')
265        return c - 'a' + 10;
266    ASSERT_NOT_REACHED();
267    return 0;
268}
269
270template<typename CharType>
271bool decodeString(const CharType* start, const CharType* end, StringBuilder* output)
272{
273    while (start < end) {
274        UChar c = *start++;
275        if ('\\' != c) {
276            output->append(c);
277            continue;
278        }
279        c = *start++;
280        switch (c) {
281        case '"':
282        case '/':
283        case '\\':
284            break;
285        case 'b':
286            c = '\b';
287            break;
288        case 'f':
289            c = '\f';
290            break;
291        case 'n':
292            c = '\n';
293            break;
294        case 'r':
295            c = '\r';
296            break;
297        case 't':
298            c = '\t';
299            break;
300        case 'v':
301            c = '\v';
302            break;
303        case 'x':
304            c = (hexToInt(*start) << 4) +
305                hexToInt(*(start + 1));
306            start += 2;
307            break;
308        case 'u':
309            c = (hexToInt(*start) << 12) +
310                (hexToInt(*(start + 1)) << 8) +
311                (hexToInt(*(start + 2)) << 4) +
312                hexToInt(*(start + 3));
313            start += 4;
314            break;
315        default:
316            return false;
317        }
318        output->append(c);
319    }
320    return true;
321}
322
323template<typename CharType>
324bool decodeString(const CharType* start, const CharType* end, String* output)
325{
326    if (start == end) {
327        *output = "";
328        return true;
329    }
330    if (start > end)
331        return false;
332    StringBuilder buffer;
333    buffer.reserveCapacity(end - start);
334    if (!decodeString(start, end, &buffer))
335        return false;
336    *output = buffer.toString();
337    return true;
338}
339
340template<typename CharType>
341PassRefPtr<JSONValue> buildValue(const CharType* start, const CharType* end, const CharType** valueTokenEnd, int depth)
342{
343    if (depth > stackLimit)
344        return nullptr;
345
346    RefPtr<JSONValue> result;
347    const CharType* tokenStart;
348    const CharType* tokenEnd;
349    Token token = parseToken(start, end, &tokenStart, &tokenEnd);
350    switch (token) {
351    case InvalidToken:
352        return nullptr;
353    case NullToken:
354        result = JSONValue::null();
355        break;
356    case BoolTrue:
357        result = JSONBasicValue::create(true);
358        break;
359    case BoolFalse:
360        result = JSONBasicValue::create(false);
361        break;
362    case Number: {
363        bool ok;
364        double value = charactersToDouble(tokenStart, tokenEnd - tokenStart, &ok);
365        if (!ok)
366            return nullptr;
367        result = JSONBasicValue::create(value);
368        break;
369    }
370    case StringLiteral: {
371        String value;
372        bool ok = decodeString(tokenStart + 1, tokenEnd - 1, &value);
373        if (!ok)
374            return nullptr;
375        result = JSONString::create(value);
376        break;
377    }
378    case ArrayBegin: {
379        RefPtr<JSONArray> array = JSONArray::create();
380        start = tokenEnd;
381        token = parseToken(start, end, &tokenStart, &tokenEnd);
382        while (token != ArrayEnd) {
383            RefPtr<JSONValue> arrayNode = buildValue(start, end, &tokenEnd, depth + 1);
384            if (!arrayNode)
385                return nullptr;
386            array->pushValue(arrayNode);
387
388            // After a list value, we expect a comma or the end of the list.
389            start = tokenEnd;
390            token = parseToken(start, end, &tokenStart, &tokenEnd);
391            if (token == ListSeparator) {
392                start = tokenEnd;
393                token = parseToken(start, end, &tokenStart, &tokenEnd);
394                if (token == ArrayEnd)
395                    return nullptr;
396            } else if (token != ArrayEnd) {
397                // Unexpected value after list value. Bail out.
398                return nullptr;
399            }
400        }
401        if (token != ArrayEnd)
402            return nullptr;
403        result = array.release();
404        break;
405    }
406    case ObjectBegin: {
407        RefPtr<JSONObject> object = JSONObject::create();
408        start = tokenEnd;
409        token = parseToken(start, end, &tokenStart, &tokenEnd);
410        while (token != ObjectEnd) {
411            if (token != StringLiteral)
412                return nullptr;
413            String key;
414            if (!decodeString(tokenStart + 1, tokenEnd - 1, &key))
415                return nullptr;
416            start = tokenEnd;
417
418            token = parseToken(start, end, &tokenStart, &tokenEnd);
419            if (token != ObjectPairSeparator)
420                return nullptr;
421            start = tokenEnd;
422
423            RefPtr<JSONValue> value = buildValue(start, end, &tokenEnd, depth + 1);
424            if (!value)
425                return nullptr;
426            object->setValue(key, value);
427            start = tokenEnd;
428
429            // After a key/value pair, we expect a comma or the end of the
430            // object.
431            token = parseToken(start, end, &tokenStart, &tokenEnd);
432            if (token == ListSeparator) {
433                start = tokenEnd;
434                token = parseToken(start, end, &tokenStart, &tokenEnd);
435                if (token == ObjectEnd)
436                    return nullptr;
437            } else if (token != ObjectEnd) {
438                // Unexpected value after last object value. Bail out.
439                return nullptr;
440            }
441        }
442        if (token != ObjectEnd)
443            return nullptr;
444        result = object.release();
445        break;
446    }
447
448    default:
449        // We got a token that's not a value.
450        return nullptr;
451    }
452    *valueTokenEnd = tokenEnd;
453    return result.release();
454}
455
456template<typename CharType>
457PassRefPtr<JSONValue> parseJSONInternal(const CharType* start, unsigned length)
458{
459    const CharType* end = start + length;
460    const CharType *tokenEnd;
461    RefPtr<JSONValue> value = buildValue(start, end, &tokenEnd, 0);
462    if (!value || tokenEnd != end)
463        return nullptr;
464    return value.release();
465}
466
467} // anonymous namespace
468
469PassRefPtr<JSONValue> parseJSON(const String& json)
470{
471    if (json.isEmpty())
472        return nullptr;
473    if (json.is8Bit())
474        return parseJSONInternal(json.characters8(), json.length());
475    return parseJSONInternal(json.characters16(), json.length());
476}
477
478} // namespace blink
479