1958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier// Copyright 2014 the V8 project authors. All rights reserved.
2958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier// Use of this source code is governed by a BSD-style license that can be
3958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier// found in the LICENSE file.
4958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier
5958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier
6958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include "src/unicode-inl.h"
7958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include "src/unicode-decoder.h"
8958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include <stdio.h>
9958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include <stdlib.h>
10958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier
11958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Berniernamespace unibrow {
12958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier
13014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
14014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch                            const uint8_t* stream, size_t stream_length) {
15958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  // Assume everything will fit in the buffer and stream won't be needed.
16958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  last_byte_of_buffer_unused_ = false;
17958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  unbuffered_start_ = NULL;
18014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  unbuffered_length_ = 0;
19958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  bool writing_to_buffer = true;
20958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  // Loop until stream is read, writing to buffer as long as buffer has space.
21014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  size_t utf16_length = 0;
22958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  while (stream_length != 0) {
23014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    size_t cursor = 0;
24958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
25958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    DCHECK(cursor > 0 && cursor <= stream_length);
26958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    stream += cursor;
27958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    stream_length -= cursor;
28958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
29958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    utf16_length += is_two_characters ? 2 : 1;
30958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    // Don't need to write to the buffer, but still need utf16_length.
31958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    if (!writing_to_buffer) continue;
32958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    // Write out the characters to the buffer.
33958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    // Must check for equality with buffer_length as we've already updated it.
34958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    if (utf16_length <= buffer_length) {
35958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      if (is_two_characters) {
36958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier        *buffer++ = Utf16::LeadSurrogate(character);
37958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier        *buffer++ = Utf16::TrailSurrogate(character);
38958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      } else {
39958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier        *buffer++ = character;
40958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      }
41958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      if (utf16_length == buffer_length) {
42958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier        // Just wrote last character of buffer
43958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier        writing_to_buffer = false;
44958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier        unbuffered_start_ = stream;
45014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch        unbuffered_length_ = stream_length;
46958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      }
47958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      continue;
48958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    }
49958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    // Have gone over buffer.
50958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    // Last char of buffer is unused, set cursor back.
51958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    DCHECK(is_two_characters);
52958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    writing_to_buffer = false;
53958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    last_byte_of_buffer_unused_ = true;
54958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    unbuffered_start_ = stream - cursor;
55014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    unbuffered_length_ = stream_length + cursor;
56958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  }
57958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  utf16_length_ = utf16_length;
58958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier}
59958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier
60958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier
61014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream,
62014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch                                     size_t stream_length, uint16_t* data,
63014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch                                     size_t data_length) {
64958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  while (data_length != 0) {
65014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    size_t cursor = 0;
66014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
67958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    // There's a total lack of bounds checking for stream
68958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    // as it was already done in Reset.
69958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    stream += cursor;
70014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    DCHECK(stream_length >= cursor);
71014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    stream_length -= cursor;
72958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
73958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      *data++ = Utf16::LeadSurrogate(character);
74958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      *data++ = Utf16::TrailSurrogate(character);
75958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      DCHECK(data_length > 1);
76958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      data_length -= 2;
77958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    } else {
78958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      *data++ = character;
79958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier      data_length -= 1;
80958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier    }
81958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier  }
82958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier}
83958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier
84958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier}  // namespace unibrow
85