scanner.cc revision 8b112d2025046f85ef7f6be087c6129c872ebad2
18b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch// Copyright 2011 the V8 project authors. All rights reserved. 2a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Redistribution and use in source and binary forms, with or without 3a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// modification, are permitted provided that the following conditions are 4a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// met: 5a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 6a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Redistributions of source code must retain the above copyright 7a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// notice, this list of conditions and the following disclaimer. 8a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Redistributions in binary form must reproduce the above 9a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// copyright notice, this list of conditions and the following 10a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// disclaimer in the documentation and/or other materials provided 11a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// with the distribution. 12a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Neither the name of Google Inc. nor the names of its 13a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// contributors may be used to endorse or promote products derived 14a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// from this software without specific prior written permission. 15a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 16a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 28a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#include "v8.h" 29a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 30a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#include "ast.h" 316ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#include "handles.h" 32a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#include "scanner.h" 338a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang#include "unicode-inl.h" 34a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 35a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace v8 { 36a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace internal { 37a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 38a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ---------------------------------------------------------------------------- 39b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// BufferedUC16CharacterStreams 40b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 41b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochBufferedUC16CharacterStream::BufferedUC16CharacterStream() 42b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch : UC16CharacterStream(), 43b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pushback_limit_(NULL) { 44b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Initialize buffer as being empty. First read will fill the buffer. 45b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_cursor_ = buffer_; 46b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_end_ = buffer_; 47b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 48a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 49b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochBufferedUC16CharacterStream::~BufferedUC16CharacterStream() { } 50a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 51b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdochvoid BufferedUC16CharacterStream::PushBack(uc32 character) { 52b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch if (character == kEndOfInput) { 53b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch pos_--; 54b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch return; 55b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch } 56b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) { 57b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // buffer_ is writable, buffer_cursor_ is const pointer. 58b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch buffer_[--buffer_cursor_ - buffer_] = static_cast<uc16>(character); 59b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pos_--; 60b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return; 616ded16be15dd865a9b21ea304d5273c8be299c87Steve Block } 62b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch SlowPushBack(static_cast<uc16>(character)); 63a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 64a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 65a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 66b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochvoid BufferedUC16CharacterStream::SlowPushBack(uc16 character) { 67b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // In pushback mode, the end of the buffer contains pushback, 68b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // and the start of the buffer (from buffer start to pushback_limit_) 69b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // contains valid data that comes just after the pushback. 70b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // We NULL the pushback_limit_ if pushing all the way back to the 71b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // start of the buffer. 72b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 73b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (pushback_limit_ == NULL) { 74b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Enter pushback mode. 75b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pushback_limit_ = buffer_end_; 76b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_end_ = buffer_ + kBufferSize; 77b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_cursor_ = buffer_end_; 78b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 791e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block // Ensure that there is room for at least one pushback. 801e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block ASSERT(buffer_cursor_ > buffer_); 81b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ASSERT(pos_ > 0); 82b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_[--buffer_cursor_ - buffer_] = character; 83b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (buffer_cursor_ == buffer_) { 84b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pushback_limit_ = NULL; 85b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } else if (buffer_cursor_ < pushback_limit_) { 86b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pushback_limit_ = buffer_cursor_; 87b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 88a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos_--; 89a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 90a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 91a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 92b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochbool BufferedUC16CharacterStream::ReadBlock() { 931e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block buffer_cursor_ = buffer_; 94b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (pushback_limit_ != NULL) { 951e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block // Leave pushback mode. 96b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_end_ = pushback_limit_; 97b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pushback_limit_ = NULL; 981e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block // If there were any valid characters left at the 991e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block // start of the buffer, use those. 1001e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block if (buffer_cursor_ < buffer_end_) return true; 1011e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block // Otherwise read a new block. 102a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 103b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned length = FillBuffer(pos_, kBufferSize); 104b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_end_ = buffer_ + length; 105b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return length > 0; 106b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 107b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 108b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 109b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) { 110b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Leave pushback mode (i.e., ignore that there might be valid data 111b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // in the buffer before the pushback_limit_ point). 112b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pushback_limit_ = NULL; 113b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return BufferSeekForward(delta); 114b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 115b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 116b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// ---------------------------------------------------------------------------- 117b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// GenericStringUC16CharacterStream 118b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 119b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 120b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochGenericStringUC16CharacterStream::GenericStringUC16CharacterStream( 121b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch Handle<String> data, 122b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned start_position, 123b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned end_position) 124b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch : string_(data), 125b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch length_(end_position) { 126b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ASSERT(end_position >= start_position); 127b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_cursor_ = buffer_; 128b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_end_ = buffer_; 129b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pos_ = start_position; 130b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 131b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 132b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 133b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochGenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { } 134b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 135b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 136b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) { 137b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned old_pos = pos_; 138b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pos_ = Min(pos_ + delta, length_); 139b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ReadBlock(); 140b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return pos_ - old_pos; 141a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 142a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 143a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 144b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos, 145b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned length) { 146b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (from_pos >= length_) return 0; 147b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (from_pos + length > length_) { 148b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch length = length_ - from_pos; 149b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 150b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length); 151b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return length; 152b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 153b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 154b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 155b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// ---------------------------------------------------------------------------- 156b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// Utf8ToUC16CharacterStream 157b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochUtf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data, 158b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned length) 159b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch : BufferedUC16CharacterStream(), 160b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_data_(data), 161b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_data_length_(length), 162b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_data_pos_(0), 163b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_character_position_(0) { 164b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ReadBlock(); 165b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 166b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 167b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 168b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochUtf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { } 169b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 170b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 171b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) { 172b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned old_pos = pos_; 173b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned target_pos = pos_ + delta; 174b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch SetRawPosition(target_pos); 175b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pos_ = raw_character_position_; 176b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ReadBlock(); 177b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return pos_ - old_pos; 178b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 179b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 180b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 181b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position, 182b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned length) { 183b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch static const unibrow::uchar kMaxUC16Character = 0xffff; 184b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch SetRawPosition(char_position); 185b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (raw_character_position_ != char_position) { 186b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // char_position was not a valid position in the stream (hit the end 187b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // while spooling to it). 188b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return 0u; 189b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 190b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned i = 0; 191b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch while (i < length) { 192b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (raw_data_pos_ == raw_data_length_) break; 193b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unibrow::uchar c = raw_data_[raw_data_pos_]; 194b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (c <= unibrow::Utf8::kMaxOneByteChar) { 195b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_data_pos_++; 196b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } else { 197b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_, 198b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_data_length_ - raw_data_pos_, 199b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch &raw_data_pos_); 200b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Don't allow characters outside of the BMP. 201b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (c > kMaxUC16Character) { 202b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch c = unibrow::Utf8::kBadChar; 203b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 204b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 205b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_[i++] = static_cast<uc16>(c); 206b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 207b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_character_position_ = char_position + i; 208b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return i; 209b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 210b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 211b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 212b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic const byte kUtf8MultiByteMask = 0xC0; 213b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic const byte kUtf8MultiByteCharStart = 0xC0; 214b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic const byte kUtf8MultiByteCharFollower = 0x80; 215b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 216b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 217b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch#ifdef DEBUG 218b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic bool IsUtf8MultiCharacterStart(byte first_byte) { 219b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart; 220b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 221b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch#endif 222b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 223b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 224b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic bool IsUtf8MultiCharacterFollower(byte later_byte) { 225b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; 226b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 227b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 228b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 229b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// Move the cursor back to point at the preceding UTF-8 character start 230b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// in the buffer. 231b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) { 232b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch byte character = buffer[--*cursor]; 233b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (character > unibrow::Utf8::kMaxOneByteChar) { 234b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ASSERT(IsUtf8MultiCharacterFollower(character)); 235b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Last byte of a multi-byte character encoding. Step backwards until 236b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // pointing to the first byte of the encoding, recognized by having the 237b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // top two bits set. 238b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } 239b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor])); 240b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 241b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 242b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 243b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 244b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// Move the cursor forward to point at the next following UTF-8 character start 245b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// in the buffer. 246b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) { 247b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch byte character = buffer[(*cursor)++]; 248b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (character > unibrow::Utf8::kMaxOneByteChar) { 249b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // First character of a multi-byte character encoding. 250b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // The number of most-significant one-bits determines the length of the 251b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // encoding: 252b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // 110..... - (0xCx, 0xDx) one additional byte (minimum). 253b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // 1110.... - (0xEx) two additional bytes. 254b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // 11110... - (0xFx) three additional bytes (maximum). 255b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ASSERT(IsUtf8MultiCharacterStart(character)); 256b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Additional bytes is: 257b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // 1 if value in range 0xC0 .. 0xDF. 258b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // 2 if value in range 0xE0 .. 0xEF. 259b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // 3 if value in range 0xF0 .. 0xF7. 260b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Encode that in a single value. 261b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch unsigned additional_bytes = 262b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; 263b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch *cursor += additional_bytes; 264b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); 265b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 266b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 267b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 268b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 269b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochvoid Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) { 270b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (raw_character_position_ > target_position) { 271b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Spool backwards in utf8 buffer. 272b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch do { 273b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch Utf8CharacterBack(raw_data_, &raw_data_pos_); 274b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_character_position_--; 275b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } while (raw_character_position_ > target_position); 276b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return; 277b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 278b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Spool forwards in the utf8 buffer. 279b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch while (raw_character_position_ < target_position) { 280b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch if (raw_data_pos_ == raw_data_length_) return; 281b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch Utf8CharacterForward(raw_data_, &raw_data_pos_); 282b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_character_position_++; 283b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch } 284b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch} 285b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 286b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 287b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// ---------------------------------------------------------------------------- 288b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// ExternalTwoByteStringUC16CharacterStream 289b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 290b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochExternalTwoByteStringUC16CharacterStream:: 291b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ~ExternalTwoByteStringUC16CharacterStream() { } 292b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 293b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 294b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochExternalTwoByteStringUC16CharacterStream 295b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ::ExternalTwoByteStringUC16CharacterStream( 296b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch Handle<ExternalTwoByteString> data, 297b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int start_position, 298b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int end_position) 299b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch : UC16CharacterStream(), 300b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch source_(data), 301b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch raw_data_(data->GetTwoByteData(start_position)) { 302b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_cursor_ = raw_data_, 303b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch buffer_end_ = raw_data_ + (end_position - start_position); 304b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch pos_ = start_position; 305a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 306a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 307a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 30880d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen// ---------------------------------------------------------------------------- 30980d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen// Scanner::LiteralScope 31080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen 31180d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian MonsenScanner::LiteralScope::LiteralScope(Scanner* self) 31280d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen : scanner_(self), complete_(false) { 31380d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen self->StartLiteral(); 31480d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen} 31580d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen 31680d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen 31780d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian MonsenScanner::LiteralScope::~LiteralScope() { 31880d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen if (!complete_) scanner_->DropLiteral(); 31980d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen} 32080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen 32180d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen 32280d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsenvoid Scanner::LiteralScope::Complete() { 32380d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen scanner_->TerminateLiteral(); 32480d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen complete_ = true; 32580d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen} 32680d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen 327b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 328d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block// ---------------------------------------------------------------------------- 3298a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang// V8JavaScriptScanner 3308a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang 331a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3329fac840a46e8b7e26894f4792ba26dde14c56b04Steve Blockvoid V8JavaScriptScanner::Initialize(UC16CharacterStream* source) { 333b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch source_ = source; 334b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // Need to capture identifiers in order to recognize "get" and "set" 335b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // in object literals. 3368a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang Init(); 3378a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang // Skip initial whitespace allowing HTML comment ends just like 3388a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang // after a newline and scan first token. 3398a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang has_line_terminator_before_next_ = true; 3408a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang SkipWhiteSpace(); 3418a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang Scan(); 3426ded16be15dd865a9b21ea304d5273c8be299c87Steve Block} 3436ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 3446ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 3458a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang// ---------------------------------------------------------------------------- 3468a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang// JsonScanner 347a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3488b112d2025046f85ef7f6be087c6129c872ebad2Ben MurdochJsonScanner::JsonScanner(UnicodeCache* unicode_cache) 3498b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch : Scanner(unicode_cache) { } 350a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3518a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang 352b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochvoid JsonScanner::Initialize(UC16CharacterStream* source) { 353b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch source_ = source; 3548a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang Init(); 3558a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang // Skip initial whitespace. 3568a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang SkipJsonWhiteSpace(); 3578a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang // Preload first token as look-ahead. 3588a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang ScanJson(); 359a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3628a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) WangToken::Value JsonScanner::Next() { 363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // BUG 1215673: Find a thread safe way to set a stack limit in 364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // pre-parse mode. Otherwise, we cannot safely pre-parse from other 365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // threads. 366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block current_ = next_; 367a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Check for stack-overflow before returning any tokens. 368b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch ScanJson(); 369a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return current_.token; 370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3738a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wangbool JsonScanner::SkipJsonWhiteSpace() { 3744515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke int start_position = source_pos(); 3754515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke // JSON WhiteSpace is tab, carrige-return, newline and space. 3764515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') { 3774515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 3784515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 3794515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke return source_pos() != start_position; 3804515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke} 3814515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 3824515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 3838a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wangvoid JsonScanner::ScanJson() { 3849fac840a46e8b7e26894f4792ba26dde14c56b04Steve Block next_.literal_chars = NULL; 3854515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Token::Value token; 3864515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke do { 3874515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke // Remember the position of the next token 3884515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke next_.location.beg_pos = source_pos(); 3894515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke switch (c0_) { 3904515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '\t': 3914515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '\r': 3924515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '\n': 3934515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case ' ': 3944515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 3954515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Token::WHITESPACE; 3964515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 3974515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '{': 3984515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 3994515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Token::LBRACE; 4004515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4014515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '}': 4024515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4034515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Token::RBRACE; 4044515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4054515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '[': 4064515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4074515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Token::LBRACK; 4084515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4094515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case ']': 4104515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4114515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Token::RBRACK; 4124515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4134515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case ':': 4144515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4154515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Token::COLON; 4164515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4174515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case ',': 4184515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4194515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Token::COMMA; 4204515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4214515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '"': 4224515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = ScanJsonString(); 4234515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4244515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '-': 4254515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '0': 4264515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '1': 4274515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '2': 4284515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '3': 4294515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '4': 4304515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '5': 4314515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '6': 4324515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '7': 4334515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '8': 4344515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '9': 4354515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = ScanJsonNumber(); 4364515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4374515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 't': 4384515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); 4394515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4404515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 'f': 4414515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); 4424515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4434515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 'n': 4444515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = ScanJsonIdentifier("null", Token::NULL_LITERAL); 4454515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4464515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke default: 4474515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ < 0) { 4484515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4494515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Token::EOS; 4504515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } else { 4514515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4524515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke token = Select(Token::ILLEGAL); 4534515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 4544515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 4554515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } while (token == Token::WHITESPACE); 4564515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 4574515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke next_.location.end_pos = source_pos(); 4584515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke next_.token = token; 4594515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke} 4604515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 4614515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 4628a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) WangToken::Value JsonScanner::ScanJsonString() { 4634515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke ASSERT_EQ('"', c0_); 4644515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 46580d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen LiteralScope literal(this); 4669fac840a46e8b7e26894f4792ba26dde14c56b04Steve Block while (c0_ != '"') { 4674515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke // Check for control character (0x00-0x1f) or unterminated string (<0). 4684515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ < 0x20) return Token::ILLEGAL; 4694515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ != '\\') { 4708a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralCharAdvance(); 4714515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } else { 4724515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4734515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke switch (c0_) { 4744515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '"': 4754515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '\\': 4764515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case '/': 4778a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralChar(c0_); 4784515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4794515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 'b': 4808a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralChar('\x08'); 4814515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4824515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 'f': 4838a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralChar('\x0c'); 4844515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4854515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 'n': 4868a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralChar('\x0a'); 4874515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4884515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 'r': 4898a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralChar('\x0d'); 4904515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4914515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 't': 4928a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralChar('\x09'); 4934515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 4944515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke case 'u': { 4954515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke uc32 value = 0; 4964515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke for (int i = 0; i < 4; i++) { 4974515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 4984515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke int digit = HexValue(c0_); 49980d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen if (digit < 0) { 50080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen return Token::ILLEGAL; 50180d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen } 5024515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke value = value * 16 + digit; 5034515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 5048a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralChar(value); 5054515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke break; 5064515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 5074515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke default: 5084515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke return Token::ILLEGAL; 5094515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 5104515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 5114515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 5124515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 51380d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen literal.Complete(); 5144515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 5154515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke return Token::STRING; 5164515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke} 5174515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 5184515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 5198a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) WangToken::Value JsonScanner::ScanJsonNumber() { 52080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen LiteralScope literal(this); 5211e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block bool negative = false; 5221e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block 5231e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block if (c0_ == '-') { 5241e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block AddLiteralCharAdvance(); 5251e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block negative = true; 5261e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block } 5274515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ == '0') { 5288a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralCharAdvance(); 5294515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke // Prefix zero is only allowed if it's the only digit before 5304515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke // a decimal point or exponent. 5314515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; 5324515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } else { 5331e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block int i = 0; 5341e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block int digits = 0; 5354515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; 5364515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke do { 5371e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block i = i * 10 + c0_ - '0'; 5381e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block digits++; 5398a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralCharAdvance(); 5404515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } while (c0_ >= '0' && c0_ <= '9'); 5411e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { 5421e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block number_ = (negative ? -i : i); 5431e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block return Token::NUMBER; 5441e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block } 5454515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 5464515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ == '.') { 5478a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralCharAdvance(); 5484515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; 5494515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke do { 5508a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralCharAdvance(); 5514515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } while (c0_ >= '0' && c0_ <= '9'); 5524515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 5539ac36c9faca11611ada13b4054edbaa0738661d0Iain Merrick if (AsciiAlphaToLower(c0_) == 'e') { 5548a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralCharAdvance(); 5558a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance(); 5564515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; 5574515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke do { 5588a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang AddLiteralCharAdvance(); 5594515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } while (c0_ >= '0' && c0_ <= '9'); 5604515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 56180d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen literal.Complete(); 5621e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block ASSERT_NOT_NULL(next_.literal_chars); 5638b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch number_ = StringToDouble(unicode_cache_, 5648b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch next_.literal_chars->ascii_literal(), 5651e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block NO_FLAGS, // Hex, octal or trailing junk. 5661e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block OS::nan_value()); 5674515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke return Token::NUMBER; 5684515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke} 5694515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 5704515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 5718a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) WangToken::Value JsonScanner::ScanJsonIdentifier(const char* text, 5728a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang Token::Value token) { 57380d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen LiteralScope literal(this); 5744515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke while (*text != '\0') { 5754515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke if (c0_ != *text) return Token::ILLEGAL; 5764515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke Advance(); 5774515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke text++; 5784515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke } 5798b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch if (unicode_cache_->IsIdentifierPart(c0_)) return Token::ILLEGAL; 58080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen literal.Complete(); 5814515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke return token; 5824515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke} 5834515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 5844515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke 585a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} } // namespace v8::internal 586