1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/utf_offset_string_conversions.h"
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
7ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include <algorithm>
8ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
9ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/scoped_ptr.h"
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string_piece.h"
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/utf_string_conversion_utils.h"
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottusing base::PrepareForUTF16Or32Output;
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottusing base::ReadUnicodeCharacter;
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottusing base::WriteUnicodeCharacter;
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Generalized Unicode converter -----------------------------------------------
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Converts the given source Unicode character type to the given destination
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Unicode character type as a STL string. The given input buffer and size
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// determine the source, and the given output STL string will be replaced by
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// the result.
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename SRC_CHAR>
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ConvertUnicode(const SRC_CHAR* src,
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    size_t src_len,
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    std::wstring* output,
27ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                    std::vector<size_t>* offsets_for_adjustment) {
28ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offsets_for_adjustment) {
29ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::for_each(offsets_for_adjustment->begin(),
30ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  offsets_for_adjustment->end(),
31ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  LimitOffset<std::wstring>(src_len));
32ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // ICU requires 32-bit numbers.
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool success = true;
36ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  AdjustOffset::Adjustments adjustments;
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int32 src_len32 = static_cast<int32>(src_len);
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int32 i = 0; i < src_len32; i++) {
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    uint32 code_point;
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    size_t original_i = i;
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    size_t chars_written = 0;
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      chars_written = WriteUnicodeCharacter(code_point, output);
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else {
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      chars_written = WriteUnicodeCharacter(0xFFFD, output);
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      success = false;
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
48ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (offsets_for_adjustment) {
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // character read, not after it (so that incrementing it in the loop
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // increment will place it at the right location), so we need to account
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // for that in determining the amount that was read.
53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      adjustments.push_back(AdjustOffset::Adjustment(
54ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen          original_i, i - original_i + 1, chars_written));
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
58ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Make offset adjustment.
59ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offsets_for_adjustment && !adjustments.empty()) {
60ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::for_each(offsets_for_adjustment->begin(),
61ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  offsets_for_adjustment->end(),
62ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  AdjustOffset(adjustments));
63ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
64ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return success;
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// UTF-8 <-> Wide --------------------------------------------------------------
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool UTF8ToWideAndAdjustOffset(const char* src,
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                               size_t src_len,
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                               std::wstring* output,
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                               size_t* offset_for_adjustment) {
74ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t> offsets;
75ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
76ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    offsets.push_back(*offset_for_adjustment);
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  PrepareForUTF16Or32Output(src, src_len, output);
78ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  bool ret = ConvertUnicode(src, src_len, output, &offsets);
79ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
80ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    *offset_for_adjustment = offsets[0];
81ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return ret;
82ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
83ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
84ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool UTF8ToWideAndAdjustOffsets(const char* src,
85ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                size_t src_len,
86ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                std::wstring* output,
87ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                std::vector<size_t>* offsets_for_adjustment) {
88ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  PrepareForUTF16Or32Output(src, src_len, output);
89ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                       size_t* offset_for_adjustment) {
94ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t> offsets;
95ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
96ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    offsets.push_back(*offset_for_adjustment);
97ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::wstring result;
98ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result,
99ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                             &offsets);
100ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
101ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    *offset_for_adjustment = offsets[0];
102ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return result;
103ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
104ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
105ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::wstring UTF8ToWideAndAdjustOffsets(const base::StringPiece& utf8,
106ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                        std::vector<size_t>*
107ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                            offsets_for_adjustment) {
108ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::wstring result;
109ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result,
110ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                             offsets_for_adjustment);
111ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return result;
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// UTF-16 <-> Wide -------------------------------------------------------------
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#if defined(WCHAR_T_IS_UTF16)
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// When wide == UTF-16, then conversions are a NOP.
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool UTF16ToWideAndAdjustOffset(const char16* src,
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                size_t src_len,
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                std::wstring* output,
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                size_t* offset_for_adjustment) {
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  output->assign(src, src_len);
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (offset_for_adjustment && (*offset_for_adjustment >= src_len))
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *offset_for_adjustment = std::wstring::npos;
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return true;
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
129ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool UTF16ToWideAndAdjustOffsets(const char16* src,
130ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                 size_t src_len,
131ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                 std::wstring* output,
132ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                 std::vector<size_t>* offsets_for_adjustment) {
133ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  output->assign(src, src_len);
134ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offsets_for_adjustment) {
135ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::for_each(offsets_for_adjustment->begin(),
136ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  offsets_for_adjustment->end(),
137ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  LimitOffset<std::wstring>(src_len));
138ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
139ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return true;
140ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
141ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                        size_t* offset_for_adjustment) {
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length()))
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *offset_for_adjustment = std::wstring::npos;
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return utf16;
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
149ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::wstring UTF16ToWideAndAdjustOffsets(
150ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const string16& utf16,
151ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::vector<size_t>* offsets_for_adjustment) {
152ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offsets_for_adjustment) {
153ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::for_each(offsets_for_adjustment->begin(),
154ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  offsets_for_adjustment->end(),
155ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  LimitOffset<std::wstring>(utf16.length()));
156ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
157ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return utf16;
158ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
159ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#elif defined(WCHAR_T_IS_UTF32)
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool UTF16ToWideAndAdjustOffset(const char16* src,
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                size_t src_len,
164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                std::wstring* output,
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                size_t* offset_for_adjustment) {
166ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t> offsets;
167ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
168ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    offsets.push_back(*offset_for_adjustment);
169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  output->clear();
170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Assume that normally we won't have any non-BMP characters so the counts
171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // will be the same.
172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  output->reserve(src_len);
173ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  bool ret = ConvertUnicode(src, src_len, output, &offsets);
174ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
175ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    *offset_for_adjustment = offsets[0];
176ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return ret;
177ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
178ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
179ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool UTF16ToWideAndAdjustOffsets(const char16* src,
180ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                 size_t src_len,
181ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                 std::wstring* output,
182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                 std::vector<size_t>* offsets_for_adjustment) {
183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  output->clear();
184ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Assume that normally we won't have any non-BMP characters so the counts
185ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // will be the same.
186ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  output->reserve(src_len);
187ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                        size_t* offset_for_adjustment) {
192ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t> offsets;
193ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
194ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    offsets.push_back(*offset_for_adjustment);
195ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::wstring result;
196ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result,
197ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                              &offsets);
198ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
199ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    *offset_for_adjustment = offsets[0];
200ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return result;
201ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
202ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
203ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::wstring UTF16ToWideAndAdjustOffsets(
204ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const string16& utf16,
205ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::vector<size_t>* offsets_for_adjustment) {
206ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::wstring result;
207ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result,
208ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                              offsets_for_adjustment);
209ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return result;
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif  // defined(WCHAR_T_IS_UTF32)
213ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
214ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenAdjustOffset::Adjustment::Adjustment(size_t location,
215ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                     size_t old_length,
216ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                     size_t new_length)
217ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  : location(location),
218ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    old_length(old_length),
219ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    new_length(new_length) {}
220ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
221ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenAdjustOffset::AdjustOffset(const Adjustments& adjustments)
222ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    : adjustments_(adjustments) {}
223ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
224ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid AdjustOffset::operator()(size_t& offset) {
225ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset == std::wstring::npos)
226ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return;
227ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  size_t adjustment = 0;
228ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (Adjustments::const_iterator i = adjustments_.begin();
229ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen       i != adjustments_.end(); ++i) {
230ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    size_t location = i->location;
231ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (offset == location && i->new_length == 0) {
232ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      offset = std::wstring::npos;
233ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      return;
234ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    }
235ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (offset <= location)
236ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      break;
237ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (offset < (location + i->old_length)) {
238ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      offset = std::wstring::npos;
239ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      return;
240ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    }
241ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    adjustment += (i->old_length - i->new_length);
242ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
243ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  offset -= adjustment;
244ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
245