utf_string_conversions.cc revision cce46a0c214b37e8da48c522c83037e8ffa4f9fd
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/utf_string_conversions.h"
6
7#include <stdint.h>
8
9#include "base/strings/string_piece.h"
10#include "base/strings/string_util.h"
11#include "base/strings/utf_string_conversion_utils.h"
12#include "build/build_config.h"
13
14namespace base {
15
16namespace {
17
18// Generalized Unicode converter -----------------------------------------------
19
20// Converts the given source Unicode character type to the given destination
21// Unicode character type as a STL string. The given input buffer and size
22// determine the source, and the given output STL string will be replaced by
23// the result.
24template<typename SRC_CHAR, typename DEST_STRING>
25bool ConvertUnicode(const SRC_CHAR* src,
26                    size_t src_len,
27                    DEST_STRING* output) {
28  // ICU requires 32-bit numbers.
29  bool success = true;
30  int32_t src_len32 = static_cast<int32_t>(src_len);
31  for (int32_t i = 0; i < src_len32; i++) {
32    uint32_t code_point;
33    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
34      WriteUnicodeCharacter(code_point, output);
35    } else {
36      WriteUnicodeCharacter(0xFFFD, output);
37      success = false;
38    }
39  }
40
41  return success;
42}
43
44}  // namespace
45
46// UTF-8 <-> Wide --------------------------------------------------------------
47
48bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
49  if (IsStringASCII(std::wstring(src, src_len))) {
50    output->assign(src, src + src_len);
51    return true;
52  } else {
53    PrepareForUTF8Output(src, src_len, output);
54    return ConvertUnicode(src, src_len, output);
55  }
56}
57
58std::string WideToUTF8(const std::wstring& wide) {
59  if (IsStringASCII(wide)) {
60    return std::string(wide.data(), wide.data() + wide.length());
61  }
62
63  std::string ret;
64  PrepareForUTF8Output(wide.data(), wide.length(), &ret);
65  ConvertUnicode(wide.data(), wide.length(), &ret);
66  return ret;
67}
68
69bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
70  if (IsStringASCII(StringPiece(src, src_len))) {
71    output->assign(src, src + src_len);
72    return true;
73  } else {
74    PrepareForUTF16Or32Output(src, src_len, output);
75    return ConvertUnicode(src, src_len, output);
76  }
77}
78
79std::wstring UTF8ToWide(StringPiece utf8) {
80  if (IsStringASCII(utf8)) {
81    return std::wstring(utf8.begin(), utf8.end());
82  }
83
84  std::wstring ret;
85  PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
86  ConvertUnicode(utf8.data(), utf8.length(), &ret);
87  return ret;
88}
89
90// UTF-16 <-> Wide -------------------------------------------------------------
91
92#if defined(WCHAR_T_IS_UTF16)
93
94// When wide == UTF-16, then conversions are a NOP.
95bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
96  output->assign(src, src_len);
97  return true;
98}
99
100string16 WideToUTF16(const std::wstring& wide) {
101  return wide;
102}
103
104bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
105  output->assign(src, src_len);
106  return true;
107}
108
109std::wstring UTF16ToWide(const string16& utf16) {
110  return utf16;
111}
112
113#elif defined(WCHAR_T_IS_UTF32)
114
115bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
116  output->clear();
117  // Assume that normally we won't have any non-BMP characters so the counts
118  // will be the same.
119  output->reserve(src_len);
120  return ConvertUnicode(src, src_len, output);
121}
122
123string16 WideToUTF16(const std::wstring& wide) {
124  string16 ret;
125  WideToUTF16(wide.data(), wide.length(), &ret);
126  return ret;
127}
128
129bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
130  output->clear();
131  // Assume that normally we won't have any non-BMP characters so the counts
132  // will be the same.
133  output->reserve(src_len);
134  return ConvertUnicode(src, src_len, output);
135}
136
137std::wstring UTF16ToWide(const string16& utf16) {
138  std::wstring ret;
139  UTF16ToWide(utf16.data(), utf16.length(), &ret);
140  return ret;
141}
142
143#endif  // defined(WCHAR_T_IS_UTF32)
144
145// UTF16 <-> UTF8 --------------------------------------------------------------
146
147#if defined(WCHAR_T_IS_UTF32)
148
149bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
150  if (IsStringASCII(StringPiece(src, src_len))) {
151    output->assign(src, src + src_len);
152    return true;
153  } else {
154    PrepareForUTF16Or32Output(src, src_len, output);
155    return ConvertUnicode(src, src_len, output);
156  }
157}
158
159string16 UTF8ToUTF16(StringPiece utf8) {
160  if (IsStringASCII(utf8)) {
161    return string16(utf8.begin(), utf8.end());
162  }
163
164  string16 ret;
165  PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
166  // Ignore the success flag of this call, it will do the best it can for
167  // invalid input, which is what we want here.
168  ConvertUnicode(utf8.data(), utf8.length(), &ret);
169  return ret;
170}
171
172bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
173  if (IsStringASCII(StringPiece16(src, src_len))) {
174    output->assign(src, src + src_len);
175    return true;
176  } else {
177    PrepareForUTF8Output(src, src_len, output);
178    return ConvertUnicode(src, src_len, output);
179  }
180}
181
182std::string UTF16ToUTF8(StringPiece16 utf16) {
183  if (IsStringASCII(utf16)) {
184    return std::string(utf16.begin(), utf16.end());
185  }
186
187  std::string ret;
188  // Ignore the success flag of this call, it will do the best it can for
189  // invalid input, which is what we want here.
190  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
191  return ret;
192}
193
194#elif defined(WCHAR_T_IS_UTF16)
195// Easy case since we can use the "wide" versions we already wrote above.
196
197bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
198  return UTF8ToWide(src, src_len, output);
199}
200
201string16 UTF8ToUTF16(StringPiece utf8) {
202  return UTF8ToWide(utf8);
203}
204
205bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
206  return WideToUTF8(src, src_len, output);
207}
208
209std::string UTF16ToUTF8(StringPiece16 utf16) {
210  if (IsStringASCII(utf16))
211    return std::string(utf16.data(), utf16.data() + utf16.length());
212
213  std::string ret;
214  PrepareForUTF8Output(utf16.data(), utf16.length(), &ret);
215  ConvertUnicode(utf16.data(), utf16.length(), &ret);
216  return ret;
217}
218
219#endif
220
221string16 ASCIIToUTF16(StringPiece ascii) {
222  DCHECK(IsStringASCII(ascii)) << ascii;
223  return string16(ascii.begin(), ascii.end());
224}
225
226std::string UTF16ToASCII(StringPiece16 utf16) {
227  DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
228  return std::string(utf16.begin(), utf16.end());
229}
230
231}  // namespace base
232