18b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch// Copyright 2011 the V8 project authors. All rights reserved.
2a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Redistribution and use in source and binary forms, with or without
3a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// modification, are permitted provided that the following conditions are
4a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// met:
5a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//
6a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//     * Redistributions of source code must retain the above copyright
7a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//       notice, this list of conditions and the following disclaimer.
8a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//     * Redistributions in binary form must reproduce the above
9a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//       copyright notice, this list of conditions and the following
10a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//       disclaimer in the documentation and/or other materials provided
11a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//       with the distribution.
12a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//     * Neither the name of Google Inc. nor the names of its
13a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//       contributors may be used to endorse or promote products derived
14a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//       from this software without specific prior written permission.
15a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block//
16a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
28a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#include "v8.h"
29a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
30a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#include "ast.h"
316ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#include "handles.h"
32a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#include "scanner.h"
338a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang#include "unicode-inl.h"
34a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
35a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace v8 {
36a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace internal {
37a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
38a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ----------------------------------------------------------------------------
39b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// BufferedUC16CharacterStreams
40b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
41b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochBufferedUC16CharacterStream::BufferedUC16CharacterStream()
42b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    : UC16CharacterStream(),
43b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      pushback_limit_(NULL) {
44b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // Initialize buffer as being empty. First read will fill the buffer.
45b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  buffer_cursor_ = buffer_;
46b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  buffer_end_ = buffer_;
47b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
48a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
49b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochBufferedUC16CharacterStream::~BufferedUC16CharacterStream() { }
50a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
51b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdochvoid BufferedUC16CharacterStream::PushBack(uc32 character) {
52b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch  if (character == kEndOfInput) {
53b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch    pos_--;
54b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch    return;
55b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch  }
56b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) {
57b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // buffer_ is writable, buffer_cursor_ is const pointer.
58b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch    buffer_[--buffer_cursor_ - buffer_] = static_cast<uc16>(character);
59b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    pos_--;
60b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    return;
616ded16be15dd865a9b21ea304d5273c8be299c87Steve Block  }
62b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch  SlowPushBack(static_cast<uc16>(character));
63a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}
64a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
65a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
66b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochvoid BufferedUC16CharacterStream::SlowPushBack(uc16 character) {
67b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // In pushback mode, the end of the buffer contains pushback,
68b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // and the start of the buffer (from buffer start to pushback_limit_)
69b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // contains valid data that comes just after the pushback.
70b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // We NULL the pushback_limit_ if pushing all the way back to the
71b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // start of the buffer.
72b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
73b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (pushback_limit_ == NULL) {
74b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // Enter pushback mode.
75b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    pushback_limit_ = buffer_end_;
76b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    buffer_end_ = buffer_ + kBufferSize;
77b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    buffer_cursor_ = buffer_end_;
78b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
791e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block  // Ensure that there is room for at least one pushback.
801e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block  ASSERT(buffer_cursor_ > buffer_);
81b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  ASSERT(pos_ > 0);
82b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  buffer_[--buffer_cursor_ - buffer_] = character;
83b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (buffer_cursor_ == buffer_) {
84b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    pushback_limit_ = NULL;
85b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  } else if (buffer_cursor_ < pushback_limit_) {
86b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    pushback_limit_ = buffer_cursor_;
87b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
88a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  pos_--;
89a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}
90a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
91a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
92b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochbool BufferedUC16CharacterStream::ReadBlock() {
931e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block  buffer_cursor_ = buffer_;
94b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (pushback_limit_ != NULL) {
951e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    // Leave pushback mode.
96b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    buffer_end_ = pushback_limit_;
97b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    pushback_limit_ = NULL;
981e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    // If there were any valid characters left at the
991e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    // start of the buffer, use those.
1001e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    if (buffer_cursor_ < buffer_end_) return true;
1011e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    // Otherwise read a new block.
102a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  }
103b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  unsigned length = FillBuffer(pos_, kBufferSize);
104b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  buffer_end_ = buffer_ + length;
105b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  return length > 0;
106b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
107b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
108b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
109b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) {
110b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // Leave pushback mode (i.e., ignore that there might be valid data
111b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // in the buffer before the pushback_limit_ point).
112b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  pushback_limit_ = NULL;
113b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  return BufferSeekForward(delta);
114b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
115b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
116b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// ----------------------------------------------------------------------------
117b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// GenericStringUC16CharacterStream
118b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
119b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
120b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochGenericStringUC16CharacterStream::GenericStringUC16CharacterStream(
121b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    Handle<String> data,
122b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    unsigned start_position,
123b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    unsigned end_position)
124b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    : string_(data),
125b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      length_(end_position) {
126b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  ASSERT(end_position >= start_position);
127b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  buffer_cursor_ = buffer_;
128b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  buffer_end_ = buffer_;
129b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  pos_ = start_position;
130b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
131b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
132b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
133b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochGenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { }
134b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
135b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
136b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) {
137b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  unsigned old_pos = pos_;
138b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  pos_ = Min(pos_ + delta, length_);
139b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  ReadBlock();
140b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  return pos_ - old_pos;
141a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}
142a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
143a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
144b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos,
145b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch                                                      unsigned length) {
146b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (from_pos >= length_) return 0;
147b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (from_pos + length > length_) {
148b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    length = length_ - from_pos;
149b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
150b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length);
151b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  return length;
152b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
153b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
154b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
155b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// ----------------------------------------------------------------------------
156b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// Utf8ToUC16CharacterStream
157b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochUtf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data,
158b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch                                                     unsigned length)
159b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    : BufferedUC16CharacterStream(),
160b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      raw_data_(data),
161b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      raw_data_length_(length),
162b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      raw_data_pos_(0),
163b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      raw_character_position_(0) {
164b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  ReadBlock();
165b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
166b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
167b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
168b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochUtf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { }
169b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
170b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
171b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) {
172b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  unsigned old_pos = pos_;
173b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  unsigned target_pos = pos_ + delta;
174b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  SetRawPosition(target_pos);
175b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  pos_ = raw_character_position_;
176b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  ReadBlock();
177b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  return pos_ - old_pos;
178b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
179b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
180b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
181b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochunsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position,
182b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch                                               unsigned length) {
183b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  static const unibrow::uchar kMaxUC16Character = 0xffff;
184b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  SetRawPosition(char_position);
185b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (raw_character_position_ != char_position) {
186b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // char_position was not a valid position in the stream (hit the end
187b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // while spooling to it).
188b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    return 0u;
189b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
190b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  unsigned i = 0;
191b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  while (i < length) {
192b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    if (raw_data_pos_ == raw_data_length_) break;
193b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    unibrow::uchar c = raw_data_[raw_data_pos_];
194b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    if (c <= unibrow::Utf8::kMaxOneByteChar) {
195b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      raw_data_pos_++;
196b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    } else {
197b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      c =  unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
198b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch                                         raw_data_length_ - raw_data_pos_,
199b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch                                         &raw_data_pos_);
200b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      // Don't allow characters outside of the BMP.
201b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      if (c > kMaxUC16Character) {
202b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch        c = unibrow::Utf8::kBadChar;
203b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      }
204b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    }
205b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    buffer_[i++] = static_cast<uc16>(c);
206b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
207b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  raw_character_position_ = char_position + i;
208b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  return i;
209b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
210b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
211b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
212b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic const byte kUtf8MultiByteMask = 0xC0;
213b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic const byte kUtf8MultiByteCharStart = 0xC0;
214b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic const byte kUtf8MultiByteCharFollower = 0x80;
215b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
216b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
217b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch#ifdef DEBUG
218b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic bool IsUtf8MultiCharacterStart(byte first_byte) {
219b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart;
220b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
221b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch#endif
222b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
223b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
224b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic bool IsUtf8MultiCharacterFollower(byte later_byte) {
225b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower;
226b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
227b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
228b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
229b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// Move the cursor back to point at the preceding UTF-8 character start
230b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// in the buffer.
231b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {
232b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  byte character = buffer[--*cursor];
233b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (character > unibrow::Utf8::kMaxOneByteChar) {
234b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    ASSERT(IsUtf8MultiCharacterFollower(character));
235b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // Last byte of a multi-byte character encoding. Step backwards until
236b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // pointing to the first byte of the encoding, recognized by having the
237b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // top two bits set.
238b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { }
239b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor]));
240b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
241b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
242b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
243b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
244b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// Move the cursor forward to point at the next following UTF-8 character start
245b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// in the buffer.
246b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochstatic inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
247b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  byte character = buffer[(*cursor)++];
248b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (character > unibrow::Utf8::kMaxOneByteChar) {
249b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // First character of a multi-byte character encoding.
250b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // The number of most-significant one-bits determines the length of the
251b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // encoding:
252b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    //  110..... - (0xCx, 0xDx) one additional byte (minimum).
253b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    //  1110.... - (0xEx) two additional bytes.
254b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    //  11110... - (0xFx) three additional bytes (maximum).
255b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    ASSERT(IsUtf8MultiCharacterStart(character));
256b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // Additional bytes is:
257b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // 1 if value in range 0xC0 .. 0xDF.
258b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // 2 if value in range 0xE0 .. 0xEF.
259b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // 3 if value in range 0xF0 .. 0xF7.
260b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // Encode that in a single value.
261b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    unsigned additional_bytes =
262b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch        ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;
263b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    *cursor += additional_bytes;
264b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));
265b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
266b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
267b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
268b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
269b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochvoid Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) {
270b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  if (raw_character_position_ > target_position) {
271b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    // Spool backwards in utf8 buffer.
272b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    do {
273b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      Utf8CharacterBack(raw_data_, &raw_data_pos_);
274b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      raw_character_position_--;
275b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    } while (raw_character_position_ > target_position);
276b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    return;
277b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
278b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // Spool forwards in the utf8 buffer.
279b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  while (raw_character_position_ < target_position) {
280b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    if (raw_data_pos_ == raw_data_length_) return;
281b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    Utf8CharacterForward(raw_data_, &raw_data_pos_);
282b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    raw_character_position_++;
283b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  }
284b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}
285b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
286b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
287b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// ----------------------------------------------------------------------------
288b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch// ExternalTwoByteStringUC16CharacterStream
289b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
290b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochExternalTwoByteStringUC16CharacterStream::
291b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    ~ExternalTwoByteStringUC16CharacterStream() { }
292b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
293b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
294b0fe1620dcb4135ac3ab2d66ff93072373911299Ben MurdochExternalTwoByteStringUC16CharacterStream
295b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    ::ExternalTwoByteStringUC16CharacterStream(
296b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch        Handle<ExternalTwoByteString> data,
297b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch        int start_position,
298b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch        int end_position)
299b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch    : UC16CharacterStream(),
300b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      source_(data),
301b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch      raw_data_(data->GetTwoByteData(start_position)) {
302b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  buffer_cursor_ = raw_data_,
303b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  buffer_end_ = raw_data_ + (end_position - start_position);
304b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  pos_ = start_position;
305a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}
306a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
307a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
30880d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen// ----------------------------------------------------------------------------
30980d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen// Scanner::LiteralScope
31080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen
31180d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian MonsenScanner::LiteralScope::LiteralScope(Scanner* self)
31280d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen    : scanner_(self), complete_(false) {
31380d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  self->StartLiteral();
31480d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen}
31580d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen
31680d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen
31780d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian MonsenScanner::LiteralScope::~LiteralScope() {
31880d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  if (!complete_) scanner_->DropLiteral();
31980d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen}
32080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen
32180d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen
32280d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsenvoid Scanner::LiteralScope::Complete() {
32380d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  scanner_->TerminateLiteral();
32480d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  complete_ = true;
32580d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen}
32680d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen
327b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch
328d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block// ----------------------------------------------------------------------------
3298a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang// V8JavaScriptScanner
3308a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang
331a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
3329fac840a46e8b7e26894f4792ba26dde14c56b04Steve Blockvoid V8JavaScriptScanner::Initialize(UC16CharacterStream* source) {
333b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  source_ = source;
334b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // Need to capture identifiers in order to recognize "get" and "set"
335b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  // in object literals.
3368a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  Init();
3378a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  // Skip initial whitespace allowing HTML comment ends just like
3388a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  // after a newline and scan first token.
3398a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  has_line_terminator_before_next_ = true;
3408a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  SkipWhiteSpace();
3418a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  Scan();
3426ded16be15dd865a9b21ea304d5273c8be299c87Steve Block}
3436ded16be15dd865a9b21ea304d5273c8be299c87Steve Block
3446ded16be15dd865a9b21ea304d5273c8be299c87Steve Block
3458a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang// ----------------------------------------------------------------------------
3468a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang// JsonScanner
347a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
3488b112d2025046f85ef7f6be087c6129c872ebad2Ben MurdochJsonScanner::JsonScanner(UnicodeCache* unicode_cache)
3498b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch    : Scanner(unicode_cache) { }
350a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
3518a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang
352b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochvoid JsonScanner::Initialize(UC16CharacterStream* source) {
353b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  source_ = source;
3548a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  Init();
3558a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  // Skip initial whitespace.
3568a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  SkipJsonWhiteSpace();
3578a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  // Preload first token as look-ahead.
3588a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang  ScanJson();
359a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}
360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
3628a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) WangToken::Value JsonScanner::Next() {
363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  // BUG 1215673: Find a thread safe way to set a stack limit in
364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  // pre-parse mode. Otherwise, we cannot safely pre-parse from other
365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  // threads.
366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  current_ = next_;
367a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  // Check for stack-overflow before returning any tokens.
368b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch  ScanJson();
369a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block  return current_.token;
370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}
371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block
3738a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wangbool JsonScanner::SkipJsonWhiteSpace() {
3744515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  int start_position = source_pos();
3754515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  // JSON WhiteSpace is tab, carrige-return, newline and space.
3764515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') {
3774515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    Advance();
3784515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  }
3794515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  return source_pos() != start_position;
3804515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke}
3814515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
3824515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
3838a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wangvoid JsonScanner::ScanJson() {
3849fac840a46e8b7e26894f4792ba26dde14c56b04Steve Block  next_.literal_chars = NULL;
3854515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  Token::Value token;
3864515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  do {
3874515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    // Remember the position of the next token
3884515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    next_.location.beg_pos = source_pos();
3894515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    switch (c0_) {
3904515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '\t':
3914515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '\r':
3924515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '\n':
3934515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case ' ':
3944515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        Advance();
3954515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = Token::WHITESPACE;
3964515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
3974515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '{':
3984515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        Advance();
3994515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = Token::LBRACE;
4004515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4014515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '}':
4024515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        Advance();
4034515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = Token::RBRACE;
4044515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4054515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '[':
4064515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        Advance();
4074515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = Token::LBRACK;
4084515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4094515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case ']':
4104515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        Advance();
4114515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = Token::RBRACK;
4124515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4134515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case ':':
4144515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        Advance();
4154515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = Token::COLON;
4164515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4174515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case ',':
4184515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        Advance();
4194515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = Token::COMMA;
4204515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4214515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '"':
4224515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = ScanJsonString();
4234515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4244515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '-':
4254515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '0':
4264515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '1':
4274515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '2':
4284515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '3':
4294515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '4':
4304515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '5':
4314515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '6':
4324515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '7':
4334515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '8':
4344515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case '9':
4354515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = ScanJsonNumber();
4364515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4374515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case 't':
4384515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
4394515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4404515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case 'f':
4414515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
4424515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4434515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      case 'n':
4444515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
4454515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        break;
4464515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      default:
4474515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        if (c0_ < 0) {
4484515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          Advance();
4494515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          token = Token::EOS;
4504515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        } else {
4514515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          Advance();
4524515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          token = Select(Token::ILLEGAL);
4534515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        }
4544515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    }
4554515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  } while (token == Token::WHITESPACE);
4564515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
4574515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  next_.location.end_pos = source_pos();
4584515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  next_.token = token;
4594515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke}
4604515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
4614515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
4628a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) WangToken::Value JsonScanner::ScanJsonString() {
4634515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  ASSERT_EQ('"', c0_);
4644515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  Advance();
46580d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  LiteralScope literal(this);
4669fac840a46e8b7e26894f4792ba26dde14c56b04Steve Block  while (c0_ != '"') {
4674515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    // Check for control character (0x00-0x1f) or unterminated string (<0).
4684515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    if (c0_ < 0x20) return Token::ILLEGAL;
4694515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    if (c0_ != '\\') {
4708a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang      AddLiteralCharAdvance();
4714515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    } else {
4724515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      Advance();
4734515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      switch (c0_) {
4744515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case '"':
4754515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case '\\':
4764515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case '/':
4778a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang          AddLiteralChar(c0_);
4784515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          break;
4794515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case 'b':
4808a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang          AddLiteralChar('\x08');
4814515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          break;
4824515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case 'f':
4838a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang          AddLiteralChar('\x0c');
4844515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          break;
4854515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case 'n':
4868a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang          AddLiteralChar('\x0a');
4874515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          break;
4884515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case 'r':
4898a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang          AddLiteralChar('\x0d');
4904515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          break;
4914515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case 't':
4928a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang          AddLiteralChar('\x09');
4934515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          break;
4944515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        case 'u': {
4954515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          uc32 value = 0;
4964515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          for (int i = 0; i < 4; i++) {
4974515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke            Advance();
4984515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke            int digit = HexValue(c0_);
49980d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen            if (digit < 0) {
50080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen              return Token::ILLEGAL;
50180d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen            }
5024515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke            value = value * 16 + digit;
5034515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          }
5048a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang          AddLiteralChar(value);
5054515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          break;
5064515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        }
5074515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke        default:
5084515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke          return Token::ILLEGAL;
5094515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      }
5104515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke      Advance();
5114515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    }
5124515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  }
51380d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  literal.Complete();
5144515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  Advance();
5154515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  return Token::STRING;
5164515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke}
5174515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
5184515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
5198a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) WangToken::Value JsonScanner::ScanJsonNumber() {
52080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  LiteralScope literal(this);
5211e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block  bool negative = false;
5221e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block
5231e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block  if (c0_ == '-') {
5241e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    AddLiteralCharAdvance();
5251e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    negative = true;
5261e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block  }
5274515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  if (c0_ == '0') {
5288a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang    AddLiteralCharAdvance();
5294515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    // Prefix zero is only allowed if it's the only digit before
5304515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    // a decimal point or exponent.
5314515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
5324515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  } else {
5331e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    int i = 0;
5341e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    int digits = 0;
5354515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
5364515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    do {
5371e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block      i = i * 10 + c0_ - '0';
5381e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block      digits++;
5398a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang      AddLiteralCharAdvance();
5404515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    } while (c0_ >= '0' && c0_ <= '9');
5411e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
5421e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block      number_ = (negative ? -i : i);
5431e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block      return Token::NUMBER;
5441e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block    }
5454515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  }
5464515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  if (c0_ == '.') {
5478a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang    AddLiteralCharAdvance();
5484515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
5494515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    do {
5508a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang      AddLiteralCharAdvance();
5514515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    } while (c0_ >= '0' && c0_ <= '9');
5524515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  }
5539ac36c9faca11611ada13b4054edbaa0738661d0Iain Merrick  if (AsciiAlphaToLower(c0_) == 'e') {
5548a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang    AddLiteralCharAdvance();
5558a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang    if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance();
5564515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
5574515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    do {
5588a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang      AddLiteralCharAdvance();
5594515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    } while (c0_ >= '0' && c0_ <= '9');
5604515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  }
56180d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  literal.Complete();
5621e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block  ASSERT_NOT_NULL(next_.literal_chars);
5638b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch  number_ = StringToDouble(unicode_cache_,
5648b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch                           next_.literal_chars->ascii_literal(),
5651e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block                           NO_FLAGS,  // Hex, octal or trailing junk.
5661e0659c275bb392c045087af4f6b0d7565cb3d77Steve Block                           OS::nan_value());
5674515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  return Token::NUMBER;
5684515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke}
5694515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
5704515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
5718a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) WangToken::Value JsonScanner::ScanJsonIdentifier(const char* text,
5728a31eba00023874d4a1dcdc5f411cc4336776874Shimeng (Simon) Wang                                             Token::Value token) {
57380d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  LiteralScope literal(this);
5744515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  while (*text != '\0') {
5754515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    if (c0_ != *text) return Token::ILLEGAL;
5764515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    Advance();
5774515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke    text++;
5784515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  }
5798b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch  if (unicode_cache_->IsIdentifierPart(c0_)) return Token::ILLEGAL;
58080d68eab642096c1a48b6474d6ec33064b0ad1f5Kristian Monsen  literal.Complete();
5814515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke  return token;
5824515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke}
5834515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
5844515c472dc3e5ed2448a564600976759e569a0a8Leon Clarke
585a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} }  // namespace v8::internal
586