1958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier// Copyright 2014 the V8 project authors. All rights reserved. 2958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier// Use of this source code is governed by a BSD-style license that can be 3958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier// found in the LICENSE file. 4958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 5958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 6958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include "src/unicode-inl.h" 7958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include "src/unicode-decoder.h" 8958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include <stdio.h> 9958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include <stdlib.h> 10958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 11958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Berniernamespace unibrow { 12958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 13014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length, 14014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch const uint8_t* stream, size_t stream_length) { 15958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // Assume everything will fit in the buffer and stream won't be needed. 16958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier last_byte_of_buffer_unused_ = false; 17958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier unbuffered_start_ = NULL; 18014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unbuffered_length_ = 0; 19958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier bool writing_to_buffer = true; 20958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // Loop until stream is read, writing to buffer as long as buffer has space. 21014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch size_t utf16_length = 0; 22958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier while (stream_length != 0) { 23014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch size_t cursor = 0; 24958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); 25958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier DCHECK(cursor > 0 && cursor <= stream_length); 26958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier stream += cursor; 27958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier stream_length -= cursor; 28958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode; 29958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier utf16_length += is_two_characters ? 2 : 1; 30958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // Don't need to write to the buffer, but still need utf16_length. 31958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier if (!writing_to_buffer) continue; 32958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // Write out the characters to the buffer. 33958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // Must check for equality with buffer_length as we've already updated it. 34958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier if (utf16_length <= buffer_length) { 35958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier if (is_two_characters) { 36958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier *buffer++ = Utf16::LeadSurrogate(character); 37958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier *buffer++ = Utf16::TrailSurrogate(character); 38958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } else { 39958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier *buffer++ = character; 40958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } 41958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier if (utf16_length == buffer_length) { 42958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // Just wrote last character of buffer 43958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier writing_to_buffer = false; 44958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier unbuffered_start_ = stream; 45014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unbuffered_length_ = stream_length; 46958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } 47958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier continue; 48958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } 49958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // Have gone over buffer. 50958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // Last char of buffer is unused, set cursor back. 51958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier DCHECK(is_two_characters); 52958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier writing_to_buffer = false; 53958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier last_byte_of_buffer_unused_ = true; 54958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier unbuffered_start_ = stream - cursor; 55014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unbuffered_length_ = stream_length + cursor; 56958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } 57958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier utf16_length_ = utf16_length; 58958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier} 59958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 60958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 61014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, 62014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch size_t stream_length, uint16_t* data, 63014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch size_t data_length) { 64958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier while (data_length != 0) { 65014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch size_t cursor = 0; 66014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); 67958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // There's a total lack of bounds checking for stream 68958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier // as it was already done in Reset. 69958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier stream += cursor; 70014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(stream_length >= cursor); 71014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch stream_length -= cursor; 72958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { 73958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier *data++ = Utf16::LeadSurrogate(character); 74958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier *data++ = Utf16::TrailSurrogate(character); 75958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier DCHECK(data_length > 1); 76958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier data_length -= 2; 77958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } else { 78958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier *data++ = character; 79958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier data_length -= 1; 80958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } 81958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } 82958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier} 83958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 84958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier} // namespace unibrow 85