155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// Copyright 2011 the V8 project authors. All rights reserved. 23484964a86451e86dcf04be9bd8c0d76ee04f081rossberg@chromium.org// Use of this source code is governed by a BSD-style license that can be 33484964a86451e86dcf04be9bd8c0d76ee04f081rossberg@chromium.org// found in the LICENSE file. 455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 5196eb601290dc49c3754da728dc58700dff2de1bmachenbach@chromium.org#include "src/v8.h" 655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 7196eb601290dc49c3754da728dc58700dff2de1bmachenbach@chromium.org#include "src/scanner-character-streams.h" 855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 9b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org#include "include/v8.h" 10196eb601290dc49c3754da728dc58700dff2de1bmachenbach@chromium.org#include "src/handles.h" 11196eb601290dc49c3754da728dc58700dff2de1bmachenbach@chromium.org#include "src/unicode-inl.h" 1255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 1355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.orgnamespace v8 { 1455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.orgnamespace internal { 1555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 16b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.orgnamespace { 17b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 18b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.orgunsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src, 19b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned* src_pos, unsigned src_length, 20b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org ScriptCompiler::StreamedSource::Encoding encoding) { 21b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (encoding == ScriptCompiler::StreamedSource::UTF8) { 22b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( 23b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org dest, length, src, src_pos, src_length); 24b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 25b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 26b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned to_fill = length; 27b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos; 28b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 29b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) { 30b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill); 31b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } else { 32b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE); 33b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org v8::internal::CopyChars<uint16_t, uint16_t>( 34b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill); 35b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 36b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org *src_pos += to_fill; 37b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org return to_fill; 38b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org} 39b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 40b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org} // namespace 41b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 42b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 4355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// ---------------------------------------------------------------------------- 44154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org// BufferedUtf16CharacterStreams 4555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 46154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgBufferedUtf16CharacterStream::BufferedUtf16CharacterStream() 47154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org : Utf16CharacterStream(), 4855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pushback_limit_(NULL) { 4955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Initialize buffer as being empty. First read will fill the buffer. 5055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_cursor_ = buffer_; 5155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_end_ = buffer_; 5255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 5355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 54e0e1b0d3e70c933d36ed381d511e9fda39f2a751mstarzinger@chromium.org 55154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgBufferedUtf16CharacterStream::~BufferedUtf16CharacterStream() { } 5655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 57154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgvoid BufferedUtf16CharacterStream::PushBack(uc32 character) { 5855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (character == kEndOfInput) { 5955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pos_--; 6055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return; 6155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 6255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) { 6355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // buffer_ is writable, buffer_cursor_ is const pointer. 6455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_[--buffer_cursor_ - buffer_] = static_cast<uc16>(character); 6555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pos_--; 6655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return; 6755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 6855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org SlowPushBack(static_cast<uc16>(character)); 6955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 7055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 7155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 72154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgvoid BufferedUtf16CharacterStream::SlowPushBack(uc16 character) { 7355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // In pushback mode, the end of the buffer contains pushback, 7455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // and the start of the buffer (from buffer start to pushback_limit_) 7555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // contains valid data that comes just after the pushback. 7655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // We NULL the pushback_limit_ if pushing all the way back to the 7755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // start of the buffer. 7855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 7955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (pushback_limit_ == NULL) { 8055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Enter pushback mode. 8155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pushback_limit_ = buffer_end_; 8255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_end_ = buffer_ + kBufferSize; 8355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_cursor_ = buffer_end_; 8455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 8555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Ensure that there is room for at least one pushback. 86e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(buffer_cursor_ > buffer_); 87e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(pos_ > 0); 8855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_[--buffer_cursor_ - buffer_] = character; 8955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (buffer_cursor_ == buffer_) { 9055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pushback_limit_ = NULL; 9155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } else if (buffer_cursor_ < pushback_limit_) { 9255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pushback_limit_ = buffer_cursor_; 9355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 9455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pos_--; 9555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 9655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 9755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 98154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgbool BufferedUtf16CharacterStream::ReadBlock() { 9955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_cursor_ = buffer_; 10055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (pushback_limit_ != NULL) { 10155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Leave pushback mode. 10255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_end_ = pushback_limit_; 10355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pushback_limit_ = NULL; 10455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // If there were any valid characters left at the 10555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // start of the buffer, use those. 10655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (buffer_cursor_ < buffer_end_) return true; 10755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Otherwise read a new block. 10855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 10912e05e8fde625d746b998a15049e8487c43a3b17machenbach@chromium.org unsigned length = FillBuffer(pos_); 11055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_end_ = buffer_ + length; 11155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return length > 0; 11255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 11355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 11455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 115154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgunsigned BufferedUtf16CharacterStream::SlowSeekForward(unsigned delta) { 11655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Leave pushback mode (i.e., ignore that there might be valid data 11755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // in the buffer before the pushback_limit_ point). 11855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pushback_limit_ = NULL; 11955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return BufferSeekForward(delta); 12055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 12155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 122e0e1b0d3e70c933d36ed381d511e9fda39f2a751mstarzinger@chromium.org 12355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// ---------------------------------------------------------------------------- 124154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org// GenericStringUtf16CharacterStream 12555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 12655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 127154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgGenericStringUtf16CharacterStream::GenericStringUtf16CharacterStream( 12855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org Handle<String> data, 12955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org unsigned start_position, 13055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org unsigned end_position) 13155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org : string_(data), 13255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org length_(end_position) { 133e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(end_position >= start_position); 13455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pos_ = start_position; 13555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 13655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 13755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 138154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgGenericStringUtf16CharacterStream::~GenericStringUtf16CharacterStream() { } 13955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 14055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 141154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgunsigned GenericStringUtf16CharacterStream::BufferSeekForward(unsigned delta) { 14255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org unsigned old_pos = pos_; 14355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pos_ = Min(pos_ + delta, length_); 14455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org ReadBlock(); 14555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return pos_ - old_pos; 14655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 14755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 14855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 14912e05e8fde625d746b998a15049e8487c43a3b17machenbach@chromium.orgunsigned GenericStringUtf16CharacterStream::FillBuffer(unsigned from_pos) { 15055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (from_pos >= length_) return 0; 15112e05e8fde625d746b998a15049e8487c43a3b17machenbach@chromium.org unsigned length = kBufferSize; 15255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (from_pos + length > length_) { 15355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org length = length_ - from_pos; 15455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 15555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length); 15655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return length; 15755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 15855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 15955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 16055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// ---------------------------------------------------------------------------- 161154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org// Utf8ToUtf16CharacterStream 162154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgUtf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data, 163154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org unsigned length) 164154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org : BufferedUtf16CharacterStream(), 16555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org raw_data_(data), 16655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org raw_data_length_(length), 16755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org raw_data_pos_(0), 16855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org raw_character_position_(0) { 16955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org ReadBlock(); 17055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 17155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 17255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 173154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgUtf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } 17455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 17555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 176b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.orgunsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length, 177b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org const byte* src, 178b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned* src_pos, 179b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned src_length) { 180b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org static const unibrow::uchar kMaxUtf16Character = 0xffff; 181b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned i = 0; 182b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer 183b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // one character early (in the normal case), because we need to have at least 184b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // two free spaces in the buffer to be sure that the next character will fit. 185b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org while (i < length - 1) { 186b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (*src_pos == src_length) break; 187b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unibrow::uchar c = src[*src_pos]; 188b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (c <= unibrow::Utf8::kMaxOneByteChar) { 189b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org *src_pos = *src_pos + 1; 190b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } else { 191b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos, 192b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org src_pos); 193b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 194b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (c > kMaxUtf16Character) { 195b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org dest[i++] = unibrow::Utf16::LeadSurrogate(c); 196b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org dest[i++] = unibrow::Utf16::TrailSurrogate(c); 197b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } else { 198b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org dest[i++] = static_cast<uc16>(c); 199b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 200b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 201b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org return i; 202b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org} 203b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 204b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 205154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgunsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) { 20655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org unsigned old_pos = pos_; 20755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org unsigned target_pos = pos_ + delta; 20855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org SetRawPosition(target_pos); 20955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pos_ = raw_character_position_; 21055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org ReadBlock(); 21155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return pos_ - old_pos; 21255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 21355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 21455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 21512e05e8fde625d746b998a15049e8487c43a3b17machenbach@chromium.orgunsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) { 21655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org SetRawPosition(char_position); 21755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (raw_character_position_ != char_position) { 21855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // char_position was not a valid position in the stream (hit the end 21955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // while spooling to it). 22055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return 0u; 22155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 222b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_, 223b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org raw_data_length_); 22455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org raw_character_position_ = char_position + i; 22555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return i; 22655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 22755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 22855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 22955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.orgstatic const byte kUtf8MultiByteMask = 0xC0; 23055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.orgstatic const byte kUtf8MultiByteCharFollower = 0x80; 23155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 23255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 23355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org#ifdef DEBUG 234afbdadc5f06365a7889e7c1c1fdb7dbf596cce68machenbach@chromium.orgstatic const byte kUtf8MultiByteCharStart = 0xC0; 23555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.orgstatic bool IsUtf8MultiCharacterStart(byte first_byte) { 23655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart; 23755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 23855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org#endif 23955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 24055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 24155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.orgstatic bool IsUtf8MultiCharacterFollower(byte later_byte) { 24255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; 24355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 24455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 24555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 24655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// Move the cursor back to point at the preceding UTF-8 character start 24755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// in the buffer. 24855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.orgstatic inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) { 24955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org byte character = buffer[--*cursor]; 25055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (character > unibrow::Utf8::kMaxOneByteChar) { 251e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(IsUtf8MultiCharacterFollower(character)); 25255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Last byte of a multi-byte character encoding. Step backwards until 25355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // pointing to the first byte of the encoding, recognized by having the 25455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // top two bits set. 25555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } 256e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(IsUtf8MultiCharacterStart(buffer[*cursor])); 25755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 25855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 25955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 26055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 26155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// Move the cursor forward to point at the next following UTF-8 character start 26255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// in the buffer. 26355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.orgstatic inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) { 26455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org byte character = buffer[(*cursor)++]; 26555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (character > unibrow::Utf8::kMaxOneByteChar) { 26655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // First character of a multi-byte character encoding. 26755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // The number of most-significant one-bits determines the length of the 26855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // encoding: 26955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // 110..... - (0xCx, 0xDx) one additional byte (minimum). 27055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // 1110.... - (0xEx) two additional bytes. 27155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // 11110... - (0xFx) three additional bytes (maximum). 272e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(IsUtf8MultiCharacterStart(character)); 27355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Additional bytes is: 27455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // 1 if value in range 0xC0 .. 0xDF. 27555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // 2 if value in range 0xE0 .. 0xEF. 27655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // 3 if value in range 0xF0 .. 0xF7. 27755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Encode that in a single value. 27855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org unsigned additional_bytes = 27955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; 28055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org *cursor += additional_bytes; 281e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); 28255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 28355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 28455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 28555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 286154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org// This can't set a raw position between two surrogate pairs, since there 287154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org// is no position in the UTF8 stream that corresponds to that. This assumes 288154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org// that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If 289154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org// it is illegally coded as two 3 byte sequences then there is no problem here. 290154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgvoid Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) { 29155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (raw_character_position_ > target_position) { 29255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Spool backwards in utf8 buffer. 29355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org do { 294154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org int old_pos = raw_data_pos_; 29555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org Utf8CharacterBack(raw_data_, &raw_data_pos_); 29655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org raw_character_position_--; 297e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(old_pos - raw_data_pos_ <= 4); 298154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // Step back over both code units for surrogate pairs. 299154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org if (old_pos - raw_data_pos_ == 4) raw_character_position_--; 30055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } while (raw_character_position_ > target_position); 301154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // No surrogate pair splitting. 302e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(raw_character_position_ == target_position); 30355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org return; 30455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 30555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org // Spool forwards in the utf8 buffer. 30655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org while (raw_character_position_ < target_position) { 30755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org if (raw_data_pos_ == raw_data_length_) return; 308154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org int old_pos = raw_data_pos_; 30955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org Utf8CharacterForward(raw_data_, &raw_data_pos_); 31055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org raw_character_position_++; 311e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(raw_data_pos_ - old_pos <= 4); 312154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org if (raw_data_pos_ - old_pos == 4) raw_character_position_++; 31355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org } 314154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org // No surrogate pair splitting. 315e3c177a423baa3c30225c4e422b6f6c76d38b951machenbach@chromium.org DCHECK(raw_character_position_ == target_position); 31655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 31755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 31855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 319b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.orgunsigned ExternalStreamingStream::FillBuffer(unsigned position) { 320b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Ignore "position" which is the position in the decoded data. Instead, 321b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // ExternalStreamingStream keeps track of the position in the raw data. 322b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned data_in_buffer = 0; 323b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Note that the UTF-8 decoder might not be able to fill the buffer 324b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // completely; it will typically leave the last character empty (see 325b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Utf8ToUtf16CharacterStream::CopyChars). 326b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org while (data_in_buffer < kBufferSize - 1) { 327b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (current_data_ == NULL) { 328b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // GetSomeData will wait until the embedder has enough data. Here's an 329b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // interface between the API which uses size_t (which is the correct type 330b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // here) and the internal parts which use unsigned. TODO(marja): make the 331b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // internal parts use size_t too. 332b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_length_ = 333b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org static_cast<unsigned>(source_stream_->GetMoreData(¤t_data_)); 334b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_offset_ = 0; 335b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org bool data_ends = current_data_length_ == 0; 336b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 337b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // A caveat: a data chunk might end with bytes from an incomplete UTF-8 338b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // character (the rest of the bytes will be in the next chunk). 339b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (encoding_ == ScriptCompiler::StreamedSource::UTF8) { 340b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org HandleUtf8SplitCharacters(&data_in_buffer); 341b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (!data_ends && current_data_offset_ == current_data_length_) { 342b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // The data stream didn't end, but we used all the data in the 343b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // chunk. This will only happen when the chunk was really small. We 344b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // don't handle the case where a UTF-8 character is split over several 345b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // chunks; in that case V8 won't crash, but it will be a parse error. 346b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org delete[] current_data_; 347b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_ = NULL; 348b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_length_ = 0; 349b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_offset_ = 0; 350b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org continue; // Request a new chunk. 351b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 352b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 353b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 354b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Did the data stream end? 355b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (data_ends) { 356b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org DCHECK(utf8_split_char_buffer_length_ == 0); 357b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org return data_in_buffer; 358b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 359b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 360b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 361b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Fill the buffer from current_data_. 362b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned new_offset = 0; 363b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned new_chars_in_buffer = 364b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org CopyCharsHelper(buffer_ + data_in_buffer, kBufferSize - data_in_buffer, 365b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_ + current_data_offset_, &new_offset, 366b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_length_ - current_data_offset_, encoding_); 367b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org data_in_buffer += new_chars_in_buffer; 368b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_offset_ += new_offset; 369b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org DCHECK(data_in_buffer <= kBufferSize); 370b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 371b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Did we use all the data in the data chunk? 372b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (current_data_offset_ == current_data_length_) { 373b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org delete[] current_data_; 374b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_ = NULL; 375b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_length_ = 0; 376b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org current_data_offset_ = 0; 377b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 378b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 379b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org return data_in_buffer; 380b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org} 381b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 382b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.orgvoid ExternalStreamingStream::HandleUtf8SplitCharacters( 383b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned* data_in_buffer) { 384b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // First check if we have leftover data from the last chunk. 385b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unibrow::uchar c; 386b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org if (utf8_split_char_buffer_length_ > 0) { 387b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Move the bytes which are part of the split character (which started in 388b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // the previous chunk) into utf8_split_char_buffer_. 389b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org while (current_data_offset_ < current_data_length_ && 390b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org utf8_split_char_buffer_length_ < 4 && 391b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org (c = current_data_[current_data_offset_]) > 392b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unibrow::Utf8::kMaxOneByteChar) { 393b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c; 394b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org ++utf8_split_char_buffer_length_; 395b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org ++current_data_offset_; 396b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 397b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 398b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Convert the data in utf8_split_char_buffer_. 399b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned new_offset = 0; 400b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org unsigned new_chars_in_buffer = 401b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org CopyCharsHelper(buffer_ + *data_in_buffer, 402b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org kBufferSize - *data_in_buffer, utf8_split_char_buffer_, 403b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org &new_offset, utf8_split_char_buffer_length_, encoding_); 404b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org *data_in_buffer += new_chars_in_buffer; 405b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Make sure we used all the data. 406b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org DCHECK(new_offset == utf8_split_char_buffer_length_); 407b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org DCHECK(*data_in_buffer <= kBufferSize); 408b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 409b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org utf8_split_char_buffer_length_ = 0; 410b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 411b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 412b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // Move bytes which are part of an incomplete character from the end of the 413b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org // current chunk to utf8_split_char_buffer_. They will be converted when the 4140b79655d2f038aa399f102f83e9ba6a6a05d3ea5dcarney@chromium.org // next data chunk arrives. Note that all valid UTF-8 characters are at most 4 4150b79655d2f038aa399f102f83e9ba6a6a05d3ea5dcarney@chromium.org // bytes long, but if the data is invalid, we can have character values bigger 4160b79655d2f038aa399f102f83e9ba6a6a05d3ea5dcarney@chromium.org // than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes. 417b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org while (current_data_length_ > current_data_offset_ && 418b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org (c = current_data_[current_data_length_ - 1]) > 4190b79655d2f038aa399f102f83e9ba6a6a05d3ea5dcarney@chromium.org unibrow::Utf8::kMaxOneByteChar && 4200b79655d2f038aa399f102f83e9ba6a6a05d3ea5dcarney@chromium.org utf8_split_char_buffer_length_ < 4) { 421b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org --current_data_length_; 422b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org ++utf8_split_char_buffer_length_; 423b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 4240b79655d2f038aa399f102f83e9ba6a6a05d3ea5dcarney@chromium.org CHECK(utf8_split_char_buffer_length_ <= 4); 425b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) { 426b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i]; 427b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org } 428b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org} 429b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 430b376fed08cb9d90a3f67f655adf63c4b35feb106machenbach@chromium.org 43155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org// ---------------------------------------------------------------------------- 432154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org// ExternalTwoByteStringUtf16CharacterStream 43355ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 434154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgExternalTwoByteStringUtf16CharacterStream:: 435154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org ~ExternalTwoByteStringUtf16CharacterStream() { } 43655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 43755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 438154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.orgExternalTwoByteStringUtf16CharacterStream 439154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org ::ExternalTwoByteStringUtf16CharacterStream( 44055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org Handle<ExternalTwoByteString> data, 44155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org int start_position, 44255ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org int end_position) 443154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org : Utf16CharacterStream(), 44455ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org source_(data), 44555ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org raw_data_(data->GetTwoByteData(start_position)) { 44655ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_cursor_ = raw_data_, 44755ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org buffer_end_ = raw_data_ + (end_position - start_position); 44855ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org pos_ = start_position; 44955ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} 45055ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org 45155ee80713569ab0324fc8dcedcb5518501daa6a6ricow@chromium.org} } // namespace v8::internal 452