json-parser.h revision 69a99ed0b2b2ef69d393c371b03db3a98aaf880e
1// Copyright 2011 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#ifndef V8_JSON_PARSER_H_ 29#define V8_JSON_PARSER_H_ 30 31#include "v8.h" 32 33#include "char-predicates-inl.h" 34#include "v8conversions.h" 35#include "messages.h" 36#include "spaces-inl.h" 37#include "token.h" 38 39namespace v8 { 40namespace internal { 41 42// A simple json parser. 43template <bool seq_ascii> 44class JsonParser BASE_EMBEDDED { 45 public: 46 static Handle<Object> Parse(Handle<String> source) { 47 return JsonParser().ParseJson(source); 48 } 49 50 static const int kEndOfString = -1; 51 52 private: 53 // Parse a string containing a single JSON value. 54 Handle<Object> ParseJson(Handle<String> source); 55 56 inline void Advance() { 57 position_++; 58 if (position_ >= source_length_) { 59 c0_ = kEndOfString; 60 } else if (seq_ascii) { 61 c0_ = seq_source_->SeqAsciiStringGet(position_); 62 } else { 63 c0_ = source_->Get(position_); 64 } 65 } 66 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, 68 // section 15.12.1.1. The only allowed whitespace characters between tokens 69 // are tab, carriage-return, newline and space. 70 71 inline void AdvanceSkipWhitespace() { 72 do { 73 Advance(); 74 } while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' '); 75 } 76 77 inline void SkipWhitespace() { 78 while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ') { 79 Advance(); 80 } 81 } 82 83 inline uc32 AdvanceGetChar() { 84 Advance(); 85 return c0_; 86 } 87 88 // Checks that current charater is c. 89 // If so, then consume c and skip whitespace. 90 inline bool MatchSkipWhiteSpace(uc32 c) { 91 if (c0_ == c) { 92 AdvanceSkipWhitespace(); 93 return true; 94 } 95 return false; 96 } 97 98 // A JSON string (production JSONString) is subset of valid JavaScript string 99 // literals. The string must only be double-quoted (not single-quoted), and 100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 102 Handle<String> ParseJsonString() { 103 return ScanJsonString<false>(); 104 } 105 Handle<String> ParseJsonSymbol() { 106 return ScanJsonString<true>(); 107 } 108 template <bool is_symbol> 109 Handle<String> ScanJsonString(); 110 // Creates a new string and copies prefix[start..end] into the beginning 111 // of it. Then scans the rest of the string, adding characters after the 112 // prefix. Called by ScanJsonString when reaching a '\' or non-ASCII char. 113 template <typename StringType, typename SinkChar> 114 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); 115 116 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 117 // decimal number literals. 118 // It includes an optional minus sign, must have at least one 119 // digit before and after a decimal point, may not have prefixed zeros (unless 120 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 121 // Hexadecimal and octal numbers are not allowed. 122 Handle<Object> ParseJsonNumber(); 123 124 // Parse a single JSON value from input (grammar production JSONValue). 125 // A JSON value is either a (double-quoted) string literal, a number literal, 126 // one of "true", "false", or "null", or an object or array literal. 127 Handle<Object> ParseJsonValue(); 128 129 // Parse a JSON object literal (grammar production JSONObject). 130 // An object literal is a squiggly-braced and comma separated sequence 131 // (possibly empty) of key/value pairs, where the key is a JSON string 132 // literal, the value is a JSON value, and the two are separated by a colon. 133 // A JSON array dosn't allow numbers and identifiers as keys, like a 134 // JavaScript array. 135 Handle<Object> ParseJsonObject(); 136 137 // Parses a JSON array literal (grammar production JSONArray). An array 138 // literal is a square-bracketed and comma separated sequence (possibly empty) 139 // of JSON values. 140 // A JSON array doesn't allow leaving out values from the sequence, nor does 141 // it allow a terminal comma, like a JavaScript array does. 142 Handle<Object> ParseJsonArray(); 143 144 145 // Mark that a parsing error has happened at the current token, and 146 // return a null handle. Primarily for readability. 147 inline Handle<Object> ReportUnexpectedCharacter() { 148 return Handle<Object>::null(); 149 } 150 151 inline Isolate* isolate() { return isolate_; } 152 153 static const int kInitialSpecialStringLength = 1024; 154 155 156 private: 157 Handle<String> source_; 158 int source_length_; 159 Handle<SeqAsciiString> seq_source_; 160 161 Isolate* isolate_; 162 uc32 c0_; 163 int position_; 164}; 165 166template <bool seq_ascii> 167Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { 168 isolate_ = source->map()->isolate(); 169 FlattenString(source); 170 source_ = source; 171 source_length_ = source_->length(); 172 173 // Optimized fast case where we only have ASCII characters. 174 if (seq_ascii) { 175 seq_source_ = Handle<SeqAsciiString>::cast(source_); 176 } 177 178 // Set initial position right before the string. 179 position_ = -1; 180 // Advance to the first character (posibly EOS) 181 AdvanceSkipWhitespace(); 182 Handle<Object> result = ParseJsonValue(); 183 if (result.is_null() || c0_ != kEndOfString) { 184 // Parse failed. Current character is the unexpected token. 185 186 const char* message; 187 Factory* factory = isolate()->factory(); 188 Handle<JSArray> array; 189 190 switch (c0_) { 191 case kEndOfString: 192 message = "unexpected_eos"; 193 array = factory->NewJSArray(0); 194 break; 195 case '-': 196 case '0': 197 case '1': 198 case '2': 199 case '3': 200 case '4': 201 case '5': 202 case '6': 203 case '7': 204 case '8': 205 case '9': 206 message = "unexpected_token_number"; 207 array = factory->NewJSArray(0); 208 break; 209 case '"': 210 message = "unexpected_token_string"; 211 array = factory->NewJSArray(0); 212 break; 213 default: 214 message = "unexpected_token"; 215 Handle<Object> name = LookupSingleCharacterStringFromCode(c0_); 216 Handle<FixedArray> element = factory->NewFixedArray(1); 217 element->set(0, *name); 218 array = factory->NewJSArrayWithElements(element); 219 break; 220 } 221 222 MessageLocation location(factory->NewScript(source), 223 position_, 224 position_ + 1); 225 Handle<Object> result = factory->NewSyntaxError(message, array); 226 isolate()->Throw(*result, &location); 227 return Handle<Object>::null(); 228 } 229 return result; 230} 231 232 233// Parse any JSON value. 234template <bool seq_ascii> 235Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() { 236 switch (c0_) { 237 case '"': 238 return ParseJsonString(); 239 case '-': 240 case '0': 241 case '1': 242 case '2': 243 case '3': 244 case '4': 245 case '5': 246 case '6': 247 case '7': 248 case '8': 249 case '9': 250 return ParseJsonNumber(); 251 case 'f': 252 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && 253 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { 254 AdvanceSkipWhitespace(); 255 return isolate()->factory()->false_value(); 256 } else { 257 return ReportUnexpectedCharacter(); 258 } 259 case 't': 260 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && 261 AdvanceGetChar() == 'e') { 262 AdvanceSkipWhitespace(); 263 return isolate()->factory()->true_value(); 264 } else { 265 return ReportUnexpectedCharacter(); 266 } 267 case 'n': 268 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && 269 AdvanceGetChar() == 'l') { 270 AdvanceSkipWhitespace(); 271 return isolate()->factory()->null_value(); 272 } else { 273 return ReportUnexpectedCharacter(); 274 } 275 case '{': 276 return ParseJsonObject(); 277 case '[': 278 return ParseJsonArray(); 279 default: 280 return ReportUnexpectedCharacter(); 281 } 282} 283 284 285// Parse a JSON object. Position must be right at '{'. 286template <bool seq_ascii> 287Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() { 288 Handle<JSFunction> object_constructor( 289 isolate()->global_context()->object_function()); 290 Handle<JSObject> json_object = 291 isolate()->factory()->NewJSObject(object_constructor); 292 ASSERT_EQ(c0_, '{'); 293 294 AdvanceSkipWhitespace(); 295 if (c0_ != '}') { 296 do { 297 if (c0_ != '"') return ReportUnexpectedCharacter(); 298 Handle<String> key = ParseJsonSymbol(); 299 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); 300 AdvanceSkipWhitespace(); 301 Handle<Object> value = ParseJsonValue(); 302 if (value.is_null()) return ReportUnexpectedCharacter(); 303 304 uint32_t index; 305 if (key->AsArrayIndex(&index)) { 306 SetOwnElement(json_object, index, value, kNonStrictMode); 307 } else if (key->Equals(isolate()->heap()->Proto_symbol())) { 308 SetPrototype(json_object, value); 309 } else { 310 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE); 311 } 312 } while (MatchSkipWhiteSpace(',')); 313 if (c0_ != '}') { 314 return ReportUnexpectedCharacter(); 315 } 316 } 317 AdvanceSkipWhitespace(); 318 return json_object; 319} 320 321// Parse a JSON array. Position must be right at '['. 322template <bool seq_ascii> 323Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() { 324 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT); 325 ZoneList<Handle<Object> > elements(4); 326 ASSERT_EQ(c0_, '['); 327 328 AdvanceSkipWhitespace(); 329 if (c0_ != ']') { 330 do { 331 Handle<Object> element = ParseJsonValue(); 332 if (element.is_null()) return ReportUnexpectedCharacter(); 333 elements.Add(element); 334 } while (MatchSkipWhiteSpace(',')); 335 if (c0_ != ']') { 336 return ReportUnexpectedCharacter(); 337 } 338 } 339 AdvanceSkipWhitespace(); 340 // Allocate a fixed array with all the elements. 341 Handle<FixedArray> fast_elements = 342 isolate()->factory()->NewFixedArray(elements.length()); 343 for (int i = 0, n = elements.length(); i < n; i++) { 344 fast_elements->set(i, *elements[i]); 345 } 346 return isolate()->factory()->NewJSArrayWithElements(fast_elements); 347} 348 349 350template <bool seq_ascii> 351Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() { 352 bool negative = false; 353 int beg_pos = position_; 354 if (c0_ == '-') { 355 Advance(); 356 negative = true; 357 } 358 if (c0_ == '0') { 359 Advance(); 360 // Prefix zero is only allowed if it's the only digit before 361 // a decimal point or exponent. 362 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter(); 363 } else { 364 int i = 0; 365 int digits = 0; 366 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); 367 do { 368 i = i * 10 + c0_ - '0'; 369 digits++; 370 Advance(); 371 } while (c0_ >= '0' && c0_ <= '9'); 372 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { 373 SkipWhitespace(); 374 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate()); 375 } 376 } 377 if (c0_ == '.') { 378 Advance(); 379 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); 380 do { 381 Advance(); 382 } while (c0_ >= '0' && c0_ <= '9'); 383 } 384 if (AsciiAlphaToLower(c0_) == 'e') { 385 Advance(); 386 if (c0_ == '-' || c0_ == '+') Advance(); 387 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); 388 do { 389 Advance(); 390 } while (c0_ >= '0' && c0_ <= '9'); 391 } 392 int length = position_ - beg_pos; 393 double number; 394 if (seq_ascii) { 395 Vector<const char> chars(seq_source_->GetChars() + beg_pos, length); 396 number = StringToDouble(isolate()->unicode_cache(), 397 chars, 398 NO_FLAGS, // Hex, octal or trailing junk. 399 OS::nan_value()); 400 } else { 401 Vector<char> buffer = Vector<char>::New(length); 402 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_); 403 Vector<const char> result = 404 Vector<const char>(reinterpret_cast<const char*>(buffer.start()), 405 length); 406 number = StringToDouble(isolate()->unicode_cache(), 407 result, 408 NO_FLAGS, // Hex, octal or trailing junk. 409 0.0); 410 buffer.Dispose(); 411 } 412 SkipWhitespace(); 413 return isolate()->factory()->NewNumber(number); 414} 415 416 417template <typename StringType> 418inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); 419 420template <> 421inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { 422 seq_str->SeqTwoByteStringSet(i, c); 423} 424 425template <> 426inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) { 427 seq_str->SeqAsciiStringSet(i, c); 428} 429 430template <typename StringType> 431inline Handle<StringType> NewRawString(Factory* factory, int length); 432 433template <> 434inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) { 435 return factory->NewRawTwoByteString(length, NOT_TENURED); 436} 437 438template <> 439inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) { 440 return factory->NewRawAsciiString(length, NOT_TENURED); 441} 442 443 444// Scans the rest of a JSON string starting from position_ and writes 445// prefix[start..end] along with the scanned characters into a 446// sequential string of type StringType. 447template <bool seq_ascii> 448template <typename StringType, typename SinkChar> 449Handle<String> JsonParser<seq_ascii>::SlowScanJsonString( 450 Handle<String> prefix, int start, int end) { 451 int count = end - start; 452 int max_length = count + source_length_ - position_; 453 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count)); 454 Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(), 455 length); 456 // Copy prefix into seq_str. 457 SinkChar* dest = seq_str->GetChars(); 458 String::WriteToFlat(*prefix, dest, start, end); 459 460 while (c0_ != '"') { 461 // Check for control character (0x00-0x1f) or unterminated string (<0). 462 if (c0_ < 0x20) return Handle<String>::null(); 463 if (count >= length) { 464 // We need to create a longer sequential string for the result. 465 return SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count); 466 } 467 if (c0_ != '\\') { 468 // If the sink can contain UC16 characters, or source_ contains only 469 // ASCII characters, there's no need to test whether we can store the 470 // character. Otherwise check whether the UC16 source character can fit 471 // in the ASCII sink. 472 if (sizeof(SinkChar) == kUC16Size || 473 seq_ascii || 474 c0_ <= kMaxAsciiCharCode) { 475 SeqStringSet(seq_str, count++, c0_); 476 Advance(); 477 } else { 478 // StringType is SeqAsciiString and we just read a non-ASCII char. 479 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 0, count); 480 } 481 } else { 482 Advance(); // Advance past the \. 483 switch (c0_) { 484 case '"': 485 case '\\': 486 case '/': 487 SeqStringSet(seq_str, count++, c0_); 488 break; 489 case 'b': 490 SeqStringSet(seq_str, count++, '\x08'); 491 break; 492 case 'f': 493 SeqStringSet(seq_str, count++, '\x0c'); 494 break; 495 case 'n': 496 SeqStringSet(seq_str, count++, '\x0a'); 497 break; 498 case 'r': 499 SeqStringSet(seq_str, count++, '\x0d'); 500 break; 501 case 't': 502 SeqStringSet(seq_str, count++, '\x09'); 503 break; 504 case 'u': { 505 uc32 value = 0; 506 for (int i = 0; i < 4; i++) { 507 Advance(); 508 int digit = HexValue(c0_); 509 if (digit < 0) { 510 return Handle<String>::null(); 511 } 512 value = value * 16 + digit; 513 } 514 if (sizeof(SinkChar) == kUC16Size || value <= kMaxAsciiCharCode) { 515 SeqStringSet(seq_str, count++, value); 516 break; 517 } else { 518 // StringType is SeqAsciiString and we just read a non-ASCII char. 519 position_ -= 6; // Rewind position_ to \ in \uxxxx. 520 Advance(); 521 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 522 0, 523 count); 524 } 525 } 526 default: 527 return Handle<String>::null(); 528 } 529 Advance(); 530 } 531 } 532 // Shrink seq_string length to count. 533 if (isolate()->heap()->InNewSpace(*seq_str)) { 534 isolate()->heap()->new_space()-> 535 template ShrinkStringAtAllocationBoundary<StringType>( 536 *seq_str, count); 537 } else { 538 int string_size = StringType::SizeFor(count); 539 int allocated_string_size = StringType::SizeFor(length); 540 int delta = allocated_string_size - string_size; 541 Address start_filler_object = seq_str->address() + string_size; 542 seq_str->set_length(count); 543 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); 544 } 545 ASSERT_EQ('"', c0_); 546 // Advance past the last '"'. 547 AdvanceSkipWhitespace(); 548 return seq_str; 549} 550 551 552template <bool seq_ascii> 553template <bool is_symbol> 554Handle<String> JsonParser<seq_ascii>::ScanJsonString() { 555 ASSERT_EQ('"', c0_); 556 Advance(); 557 if (c0_ == '"') { 558 AdvanceSkipWhitespace(); 559 return Handle<String>(isolate()->heap()->empty_string()); 560 } 561 int beg_pos = position_; 562 // Fast case for ASCII only without escape characters. 563 do { 564 // Check for control character (0x00-0x1f) or unterminated string (<0). 565 if (c0_ < 0x20) return Handle<String>::null(); 566 if (c0_ != '\\') { 567 if (seq_ascii || c0_ <= kMaxAsciiCharCode) { 568 Advance(); 569 } else { 570 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, 571 beg_pos, 572 position_); 573 } 574 } else { 575 return SlowScanJsonString<SeqAsciiString, char>(source_, 576 beg_pos, 577 position_); 578 } 579 } while (c0_ != '"'); 580 int length = position_ - beg_pos; 581 Handle<String> result; 582 if (seq_ascii && is_symbol) { 583 result = isolate()->factory()->LookupAsciiSymbol(seq_source_, 584 beg_pos, 585 length); 586 } else { 587 result = isolate()->factory()->NewRawAsciiString(length); 588 char* dest = SeqAsciiString::cast(*result)->GetChars(); 589 String::WriteToFlat(*source_, dest, beg_pos, position_); 590 } 591 ASSERT_EQ('"', c0_); 592 // Advance past the last '"'. 593 AdvanceSkipWhitespace(); 594 return result; 595} 596 597} } // namespace v8::internal 598 599#endif // V8_JSON_PARSER_H_ 600