1/* 2 * Copyright (C) 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "config.h" 27#include "LiteralParser.h" 28 29#include "JSArray.h" 30#include "JSString.h" 31#include "Lexer.h" 32#include "StringBuilder.h" 33#include <wtf/ASCIICType.h> 34#include <wtf/dtoa.h> 35 36namespace JSC { 37 38LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) 39{ 40 while (m_ptr < m_end && isASCIISpace(*m_ptr)) 41 ++m_ptr; 42 43 ASSERT(m_ptr <= m_end); 44 if (m_ptr >= m_end) { 45 token.type = TokEnd; 46 token.start = token.end = m_ptr; 47 return TokEnd; 48 } 49 token.type = TokError; 50 token.start = m_ptr; 51 switch (*m_ptr) { 52 case '[': 53 token.type = TokLBracket; 54 token.end = ++m_ptr; 55 return TokLBracket; 56 case ']': 57 token.type = TokRBracket; 58 token.end = ++m_ptr; 59 return TokRBracket; 60 case '(': 61 token.type = TokLParen; 62 token.end = ++m_ptr; 63 return TokLBracket; 64 case ')': 65 token.type = TokRParen; 66 token.end = ++m_ptr; 67 return TokRBracket; 68 case '{': 69 token.type = TokLBrace; 70 token.end = ++m_ptr; 71 return TokLBrace; 72 case '}': 73 token.type = TokRBrace; 74 token.end = ++m_ptr; 75 return TokRBrace; 76 case ',': 77 token.type = TokComma; 78 token.end = ++m_ptr; 79 return TokComma; 80 case ':': 81 token.type = TokColon; 82 token.end = ++m_ptr; 83 return TokColon; 84 case '"': 85 if (m_mode == StrictJSON) 86 return lexString<StrictJSON>(token); 87 return lexString<NonStrictJSON>(token); 88 case 't': 89 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { 90 m_ptr += 4; 91 token.type = TokTrue; 92 token.end = m_ptr; 93 return TokTrue; 94 } 95 break; 96 case 'f': 97 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { 98 m_ptr += 5; 99 token.type = TokFalse; 100 token.end = m_ptr; 101 return TokFalse; 102 } 103 break; 104 case 'n': 105 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { 106 m_ptr += 4; 107 token.type = TokNull; 108 token.end = m_ptr; 109 return TokNull; 110 } 111 break; 112 case '-': 113 case '0': 114 case '1': 115 case '2': 116 case '3': 117 case '4': 118 case '5': 119 case '6': 120 case '7': 121 case '8': 122 case '9': 123 return lexNumber(token); 124 } 125 return TokError; 126} 127 128template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) 129{ 130 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; 131} 132 133// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. 134template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) 135{ 136 ++m_ptr; 137 const UChar* runStart; 138 StringBuilder builder; 139 do { 140 runStart = m_ptr; 141 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) 142 ++m_ptr; 143 if (runStart < m_ptr) 144 builder.append(runStart, m_ptr - runStart); 145 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { 146 ++m_ptr; 147 if (m_ptr >= m_end) 148 return TokError; 149 switch (*m_ptr) { 150 case '"': 151 builder.append('"'); 152 m_ptr++; 153 break; 154 case '\\': 155 builder.append('\\'); 156 m_ptr++; 157 break; 158 case '/': 159 builder.append('/'); 160 m_ptr++; 161 break; 162 case 'b': 163 builder.append('\b'); 164 m_ptr++; 165 break; 166 case 'f': 167 builder.append('\f'); 168 m_ptr++; 169 break; 170 case 'n': 171 builder.append('\n'); 172 m_ptr++; 173 break; 174 case 'r': 175 builder.append('\r'); 176 m_ptr++; 177 break; 178 case 't': 179 builder.append('\t'); 180 m_ptr++; 181 break; 182 183 case 'u': 184 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters 185 return TokError; 186 for (int i = 1; i < 5; i++) { 187 if (!isASCIIHexDigit(m_ptr[i])) 188 return TokError; 189 } 190 builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); 191 m_ptr += 5; 192 break; 193 194 default: 195 return TokError; 196 } 197 } 198 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); 199 200 if (m_ptr >= m_end || *m_ptr != '"') 201 return TokError; 202 203 token.stringToken = builder.build(); 204 token.type = TokString; 205 token.end = ++m_ptr; 206 return TokString; 207} 208 209LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) 210{ 211 // ES5 and json.org define numbers as 212 // number 213 // int 214 // int frac? exp? 215 // 216 // int 217 // -? 0 218 // -? digit1-9 digits? 219 // 220 // digits 221 // digit digits? 222 // 223 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? 224 225 if (m_ptr < m_end && *m_ptr == '-') // -? 226 ++m_ptr; 227 228 // (0 | [1-9][0-9]*) 229 if (m_ptr < m_end && *m_ptr == '0') // 0 230 ++m_ptr; 231 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] 232 ++m_ptr; 233 // [0-9]* 234 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) 235 ++m_ptr; 236 } else 237 return TokError; 238 239 // ('.' [0-9]+)? 240 if (m_ptr < m_end && *m_ptr == '.') { 241 ++m_ptr; 242 // [0-9]+ 243 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) 244 return TokError; 245 246 ++m_ptr; 247 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) 248 ++m_ptr; 249 } 250 251 // ([eE][+-]? [0-9]+)? 252 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] 253 ++m_ptr; 254 255 // [-+]? 256 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) 257 ++m_ptr; 258 259 // [0-9]+ 260 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) 261 return TokError; 262 263 ++m_ptr; 264 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) 265 ++m_ptr; 266 } 267 268 token.type = TokNumber; 269 token.end = m_ptr; 270 Vector<char, 64> buffer(token.end - token.start + 1); 271 int i; 272 for (i = 0; i < token.end - token.start; i++) { 273 ASSERT(static_cast<char>(token.start[i]) == token.start[i]); 274 buffer[i] = static_cast<char>(token.start[i]); 275 } 276 buffer[i] = 0; 277 char* end; 278 token.numberToken = WTF::strtod(buffer.data(), &end); 279 ASSERT(buffer.data() + (token.end - token.start) == end); 280 return TokNumber; 281} 282 283JSValue LiteralParser::parse(ParserState initialState) 284{ 285 ParserState state = initialState; 286 MarkedArgumentBuffer objectStack; 287 JSValue lastValue; 288 Vector<ParserState, 16> stateStack; 289 Vector<Identifier, 16> identifierStack; 290 while (1) { 291 switch(state) { 292 startParseArray: 293 case StartParseArray: { 294 JSArray* array = constructEmptyArray(m_exec); 295 objectStack.append(array); 296 // fallthrough 297 } 298 doParseArrayStartExpression: 299 case DoParseArrayStartExpression: { 300 TokenType lastToken = m_lexer.currentToken().type; 301 if (m_lexer.next() == TokRBracket) { 302 if (lastToken == TokComma) 303 return JSValue(); 304 m_lexer.next(); 305 lastValue = objectStack.last(); 306 objectStack.removeLast(); 307 break; 308 } 309 310 stateStack.append(DoParseArrayEndExpression); 311 goto startParseExpression; 312 } 313 case DoParseArrayEndExpression: { 314 asArray(objectStack.last())->push(m_exec, lastValue); 315 316 if (m_lexer.currentToken().type == TokComma) 317 goto doParseArrayStartExpression; 318 319 if (m_lexer.currentToken().type != TokRBracket) 320 return JSValue(); 321 322 m_lexer.next(); 323 lastValue = objectStack.last(); 324 objectStack.removeLast(); 325 break; 326 } 327 startParseObject: 328 case StartParseObject: { 329 JSObject* object = constructEmptyObject(m_exec); 330 objectStack.append(object); 331 332 TokenType type = m_lexer.next(); 333 if (type == TokString) { 334 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); 335 336 // Check for colon 337 if (m_lexer.next() != TokColon) 338 return JSValue(); 339 340 m_lexer.next(); 341 identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); 342 stateStack.append(DoParseObjectEndExpression); 343 goto startParseExpression; 344 } else if (type != TokRBrace) 345 return JSValue(); 346 m_lexer.next(); 347 lastValue = objectStack.last(); 348 objectStack.removeLast(); 349 break; 350 } 351 doParseObjectStartExpression: 352 case DoParseObjectStartExpression: { 353 TokenType type = m_lexer.next(); 354 if (type != TokString) 355 return JSValue(); 356 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); 357 358 // Check for colon 359 if (m_lexer.next() != TokColon) 360 return JSValue(); 361 362 m_lexer.next(); 363 identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); 364 stateStack.append(DoParseObjectEndExpression); 365 goto startParseExpression; 366 } 367 case DoParseObjectEndExpression: 368 { 369 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue); 370 identifierStack.removeLast(); 371 if (m_lexer.currentToken().type == TokComma) 372 goto doParseObjectStartExpression; 373 if (m_lexer.currentToken().type != TokRBrace) 374 return JSValue(); 375 m_lexer.next(); 376 lastValue = objectStack.last(); 377 objectStack.removeLast(); 378 break; 379 } 380 startParseExpression: 381 case StartParseExpression: { 382 switch (m_lexer.currentToken().type) { 383 case TokLBracket: 384 goto startParseArray; 385 case TokLBrace: 386 goto startParseObject; 387 case TokString: { 388 Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); 389 m_lexer.next(); 390 lastValue = jsString(m_exec, stringToken.stringToken); 391 break; 392 } 393 case TokNumber: { 394 Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); 395 m_lexer.next(); 396 lastValue = jsNumber(m_exec, numberToken.numberToken); 397 break; 398 } 399 case TokNull: 400 m_lexer.next(); 401 lastValue = jsNull(); 402 break; 403 404 case TokTrue: 405 m_lexer.next(); 406 lastValue = jsBoolean(true); 407 break; 408 409 case TokFalse: 410 m_lexer.next(); 411 lastValue = jsBoolean(false); 412 break; 413 414 default: 415 // Error 416 return JSValue(); 417 } 418 break; 419 } 420 case StartParseStatement: { 421 switch (m_lexer.currentToken().type) { 422 case TokLBracket: 423 case TokNumber: 424 case TokString: 425 goto startParseExpression; 426 427 case TokLParen: { 428 m_lexer.next(); 429 stateStack.append(StartParseStatementEndStatement); 430 goto startParseExpression; 431 } 432 default: 433 return JSValue(); 434 } 435 } 436 case StartParseStatementEndStatement: { 437 ASSERT(stateStack.isEmpty()); 438 if (m_lexer.currentToken().type != TokRParen) 439 return JSValue(); 440 if (m_lexer.next() == TokEnd) 441 return lastValue; 442 return JSValue(); 443 } 444 default: 445 ASSERT_NOT_REACHED(); 446 } 447 if (stateStack.isEmpty()) 448 return lastValue; 449 state = stateStack.last(); 450 stateStack.removeLast(); 451 continue; 452 } 453} 454 455} 456