1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "LiteralParser.h"
28
29#include "JSArray.h"
30#include "JSString.h"
31#include "Lexer.h"
32#include "StringBuilder.h"
33#include <wtf/ASCIICType.h>
34#include <wtf/dtoa.h>
35
36namespace JSC {
37
38LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
39{
40    while (m_ptr < m_end && isASCIISpace(*m_ptr))
41        ++m_ptr;
42
43    ASSERT(m_ptr <= m_end);
44    if (m_ptr >= m_end) {
45        token.type = TokEnd;
46        token.start = token.end = m_ptr;
47        return TokEnd;
48    }
49    token.type = TokError;
50    token.start = m_ptr;
51    switch (*m_ptr) {
52        case '[':
53            token.type = TokLBracket;
54            token.end = ++m_ptr;
55            return TokLBracket;
56        case ']':
57            token.type = TokRBracket;
58            token.end = ++m_ptr;
59            return TokRBracket;
60        case '(':
61            token.type = TokLParen;
62            token.end = ++m_ptr;
63            return TokLBracket;
64        case ')':
65            token.type = TokRParen;
66            token.end = ++m_ptr;
67            return TokRBracket;
68        case '{':
69            token.type = TokLBrace;
70            token.end = ++m_ptr;
71            return TokLBrace;
72        case '}':
73            token.type = TokRBrace;
74            token.end = ++m_ptr;
75            return TokRBrace;
76        case ',':
77            token.type = TokComma;
78            token.end = ++m_ptr;
79            return TokComma;
80        case ':':
81            token.type = TokColon;
82            token.end = ++m_ptr;
83            return TokColon;
84        case '"':
85            if (m_mode == StrictJSON)
86                return lexString<StrictJSON>(token);
87            return lexString<NonStrictJSON>(token);
88        case 't':
89            if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
90                m_ptr += 4;
91                token.type = TokTrue;
92                token.end = m_ptr;
93                return TokTrue;
94            }
95            break;
96        case 'f':
97            if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
98                m_ptr += 5;
99                token.type = TokFalse;
100                token.end = m_ptr;
101                return TokFalse;
102            }
103            break;
104        case 'n':
105            if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
106                m_ptr += 4;
107                token.type = TokNull;
108                token.end = m_ptr;
109                return TokNull;
110            }
111            break;
112        case '-':
113        case '0':
114        case '1':
115        case '2':
116        case '3':
117        case '4':
118        case '5':
119        case '6':
120        case '7':
121        case '8':
122        case '9':
123            return lexNumber(token);
124    }
125    return TokError;
126}
127
128template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
129{
130    return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
131}
132
133// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
134template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
135{
136    ++m_ptr;
137    const UChar* runStart;
138    StringBuilder builder;
139    do {
140        runStart = m_ptr;
141        while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
142            ++m_ptr;
143        if (runStart < m_ptr)
144            builder.append(runStart, m_ptr - runStart);
145        if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
146            ++m_ptr;
147            if (m_ptr >= m_end)
148                return TokError;
149            switch (*m_ptr) {
150                case '"':
151                    builder.append('"');
152                    m_ptr++;
153                    break;
154                case '\\':
155                    builder.append('\\');
156                    m_ptr++;
157                    break;
158                case '/':
159                    builder.append('/');
160                    m_ptr++;
161                    break;
162                case 'b':
163                    builder.append('\b');
164                    m_ptr++;
165                    break;
166                case 'f':
167                    builder.append('\f');
168                    m_ptr++;
169                    break;
170                case 'n':
171                    builder.append('\n');
172                    m_ptr++;
173                    break;
174                case 'r':
175                    builder.append('\r');
176                    m_ptr++;
177                    break;
178                case 't':
179                    builder.append('\t');
180                    m_ptr++;
181                    break;
182
183                case 'u':
184                    if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
185                        return TokError;
186                    for (int i = 1; i < 5; i++) {
187                        if (!isASCIIHexDigit(m_ptr[i]))
188                            return TokError;
189                    }
190                    builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
191                    m_ptr += 5;
192                    break;
193
194                default:
195                    return TokError;
196            }
197        }
198    } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
199
200    if (m_ptr >= m_end || *m_ptr != '"')
201        return TokError;
202
203    token.stringToken = builder.build();
204    token.type = TokString;
205    token.end = ++m_ptr;
206    return TokString;
207}
208
209LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
210{
211    // ES5 and json.org define numbers as
212    // number
213    //     int
214    //     int frac? exp?
215    //
216    // int
217    //     -? 0
218    //     -? digit1-9 digits?
219    //
220    // digits
221    //     digit digits?
222    //
223    // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
224
225    if (m_ptr < m_end && *m_ptr == '-') // -?
226        ++m_ptr;
227
228    // (0 | [1-9][0-9]*)
229    if (m_ptr < m_end && *m_ptr == '0') // 0
230        ++m_ptr;
231    else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
232        ++m_ptr;
233        // [0-9]*
234        while (m_ptr < m_end && isASCIIDigit(*m_ptr))
235            ++m_ptr;
236    } else
237        return TokError;
238
239    // ('.' [0-9]+)?
240    if (m_ptr < m_end && *m_ptr == '.') {
241        ++m_ptr;
242        // [0-9]+
243        if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
244            return TokError;
245
246        ++m_ptr;
247        while (m_ptr < m_end && isASCIIDigit(*m_ptr))
248            ++m_ptr;
249    }
250
251    //  ([eE][+-]? [0-9]+)?
252    if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
253        ++m_ptr;
254
255        // [-+]?
256        if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
257            ++m_ptr;
258
259        // [0-9]+
260        if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
261            return TokError;
262
263        ++m_ptr;
264        while (m_ptr < m_end && isASCIIDigit(*m_ptr))
265            ++m_ptr;
266    }
267
268    token.type = TokNumber;
269    token.end = m_ptr;
270    Vector<char, 64> buffer(token.end - token.start + 1);
271    int i;
272    for (i = 0; i < token.end - token.start; i++) {
273        ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
274        buffer[i] = static_cast<char>(token.start[i]);
275    }
276    buffer[i] = 0;
277    char* end;
278    token.numberToken = WTF::strtod(buffer.data(), &end);
279    ASSERT(buffer.data() + (token.end - token.start) == end);
280    return TokNumber;
281}
282
283JSValue LiteralParser::parse(ParserState initialState)
284{
285    ParserState state = initialState;
286    MarkedArgumentBuffer objectStack;
287    JSValue lastValue;
288    Vector<ParserState, 16> stateStack;
289    Vector<Identifier, 16> identifierStack;
290    while (1) {
291        switch(state) {
292            startParseArray:
293            case StartParseArray: {
294                JSArray* array = constructEmptyArray(m_exec);
295                objectStack.append(array);
296                // fallthrough
297            }
298            doParseArrayStartExpression:
299            case DoParseArrayStartExpression: {
300                TokenType lastToken = m_lexer.currentToken().type;
301                if (m_lexer.next() == TokRBracket) {
302                    if (lastToken == TokComma)
303                        return JSValue();
304                    m_lexer.next();
305                    lastValue = objectStack.last();
306                    objectStack.removeLast();
307                    break;
308                }
309
310                stateStack.append(DoParseArrayEndExpression);
311                goto startParseExpression;
312            }
313            case DoParseArrayEndExpression: {
314                 asArray(objectStack.last())->push(m_exec, lastValue);
315
316                if (m_lexer.currentToken().type == TokComma)
317                    goto doParseArrayStartExpression;
318
319                if (m_lexer.currentToken().type != TokRBracket)
320                    return JSValue();
321
322                m_lexer.next();
323                lastValue = objectStack.last();
324                objectStack.removeLast();
325                break;
326            }
327            startParseObject:
328            case StartParseObject: {
329                JSObject* object = constructEmptyObject(m_exec);
330                objectStack.append(object);
331
332                TokenType type = m_lexer.next();
333                if (type == TokString) {
334                    Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
335
336                    // Check for colon
337                    if (m_lexer.next() != TokColon)
338                        return JSValue();
339
340                    m_lexer.next();
341                    identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
342                    stateStack.append(DoParseObjectEndExpression);
343                    goto startParseExpression;
344                } else if (type != TokRBrace)
345                    return JSValue();
346                m_lexer.next();
347                lastValue = objectStack.last();
348                objectStack.removeLast();
349                break;
350            }
351            doParseObjectStartExpression:
352            case DoParseObjectStartExpression: {
353                TokenType type = m_lexer.next();
354                if (type != TokString)
355                    return JSValue();
356                Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
357
358                // Check for colon
359                if (m_lexer.next() != TokColon)
360                    return JSValue();
361
362                m_lexer.next();
363                identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
364                stateStack.append(DoParseObjectEndExpression);
365                goto startParseExpression;
366            }
367            case DoParseObjectEndExpression:
368            {
369                asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
370                identifierStack.removeLast();
371                if (m_lexer.currentToken().type == TokComma)
372                    goto doParseObjectStartExpression;
373                if (m_lexer.currentToken().type != TokRBrace)
374                    return JSValue();
375                m_lexer.next();
376                lastValue = objectStack.last();
377                objectStack.removeLast();
378                break;
379            }
380            startParseExpression:
381            case StartParseExpression: {
382                switch (m_lexer.currentToken().type) {
383                    case TokLBracket:
384                        goto startParseArray;
385                    case TokLBrace:
386                        goto startParseObject;
387                    case TokString: {
388                        Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
389                        m_lexer.next();
390                        lastValue = jsString(m_exec, stringToken.stringToken);
391                        break;
392                    }
393                    case TokNumber: {
394                        Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
395                        m_lexer.next();
396                        lastValue = jsNumber(m_exec, numberToken.numberToken);
397                        break;
398                    }
399                    case TokNull:
400                        m_lexer.next();
401                        lastValue = jsNull();
402                        break;
403
404                    case TokTrue:
405                        m_lexer.next();
406                        lastValue = jsBoolean(true);
407                        break;
408
409                    case TokFalse:
410                        m_lexer.next();
411                        lastValue = jsBoolean(false);
412                        break;
413
414                    default:
415                        // Error
416                        return JSValue();
417                }
418                break;
419            }
420            case StartParseStatement: {
421                switch (m_lexer.currentToken().type) {
422                    case TokLBracket:
423                    case TokNumber:
424                    case TokString:
425                        goto startParseExpression;
426
427                    case TokLParen: {
428                        m_lexer.next();
429                        stateStack.append(StartParseStatementEndStatement);
430                        goto startParseExpression;
431                    }
432                    default:
433                        return JSValue();
434                }
435            }
436            case StartParseStatementEndStatement: {
437                ASSERT(stateStack.isEmpty());
438                if (m_lexer.currentToken().type != TokRParen)
439                    return JSValue();
440                if (m_lexer.next() == TokEnd)
441                    return lastValue;
442                return JSValue();
443            }
444            default:
445                ASSERT_NOT_REACHED();
446        }
447        if (stateStack.isEmpty())
448            return lastValue;
449        state = stateStack.last();
450        stateStack.removeLast();
451        continue;
452    }
453}
454
455}
456