1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/utf_string_conversions.h"
6
7#include "base/string_piece.h"
8#include "base/string_util.h"
9#include "base/utf_string_conversion_utils.h"
10
11using base::PrepareForUTF8Output;
12using base::PrepareForUTF16Or32Output;
13using base::ReadUnicodeCharacter;
14using base::WriteUnicodeCharacter;
15
16namespace {
17
18// Generalized Unicode converter -----------------------------------------------
19
20// Converts the given source Unicode character type to the given destination
21// Unicode character type as a STL string. The given input buffer and size
22// determine the source, and the given output STL string will be replaced by
23// the result.
24template<typename SRC_CHAR, typename DEST_STRING>
25bool ConvertUnicode(const SRC_CHAR* src,
26                    size_t src_len,
27                    DEST_STRING* output) {
28  // ICU requires 32-bit numbers.
29  bool success = true;
30  int32 src_len32 = static_cast<int32>(src_len);
31  for (int32 i = 0; i < src_len32; i++) {
32    uint32 code_point;
33    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
34      WriteUnicodeCharacter(code_point, output);
35    } else {
36      WriteUnicodeCharacter(0xFFFD, output);
37      success = false;
38    }
39  }
40
41  return success;
42}
43
44}  // namespace
45
46// UTF-8 <-> Wide --------------------------------------------------------------
47
48bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
49  PrepareForUTF8Output(src, src_len, output);
50  return ConvertUnicode(src, src_len, output);
51}
52
53std::string WideToUTF8(const std::wstring& wide) {
54  std::string ret;
55  // Ignore the success flag of this call, it will do the best it can for
56  // invalid input, which is what we want here.
57  WideToUTF8(wide.data(), wide.length(), &ret);
58  return ret;
59}
60
61bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
62  PrepareForUTF16Or32Output(src, src_len, output);
63  return ConvertUnicode(src, src_len, output);
64}
65
66std::wstring UTF8ToWide(const base::StringPiece& utf8) {
67  std::wstring ret;
68  UTF8ToWide(utf8.data(), utf8.length(), &ret);
69  return ret;
70}
71
72// UTF-16 <-> Wide -------------------------------------------------------------
73
74#if defined(WCHAR_T_IS_UTF16)
75
76// When wide == UTF-16, then conversions are a NOP.
77bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
78  output->assign(src, src_len);
79  return true;
80}
81
82string16 WideToUTF16(const std::wstring& wide) {
83  return wide;
84}
85
86bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
87  output->assign(src, src_len);
88  return true;
89}
90
91std::wstring UTF16ToWide(const string16& utf16) {
92  return utf16;
93}
94
95#elif defined(WCHAR_T_IS_UTF32)
96
97bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
98  output->clear();
99  // Assume that normally we won't have any non-BMP characters so the counts
100  // will be the same.
101  output->reserve(src_len);
102  return ConvertUnicode(src, src_len, output);
103}
104
105string16 WideToUTF16(const std::wstring& wide) {
106  string16 ret;
107  WideToUTF16(wide.data(), wide.length(), &ret);
108  return ret;
109}
110
111bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
112  output->clear();
113  // Assume that normally we won't have any non-BMP characters so the counts
114  // will be the same.
115  output->reserve(src_len);
116  return ConvertUnicode(src, src_len, output);
117}
118
119std::wstring UTF16ToWide(const string16& utf16) {
120  std::wstring ret;
121  UTF16ToWide(utf16.data(), utf16.length(), &ret);
122  return ret;
123}
124
125#endif  // defined(WCHAR_T_IS_UTF32)
126
127// UTF16 <-> UTF8 --------------------------------------------------------------
128
129#if defined(WCHAR_T_IS_UTF32)
130
131bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
132  PrepareForUTF16Or32Output(src, src_len, output);
133  return ConvertUnicode(src, src_len, output);
134}
135
136string16 UTF8ToUTF16(const base::StringPiece& utf8) {
137  string16 ret;
138  // Ignore the success flag of this call, it will do the best it can for
139  // invalid input, which is what we want here.
140  UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
141  return ret;
142}
143
144bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
145  PrepareForUTF8Output(src, src_len, output);
146  return ConvertUnicode(src, src_len, output);
147}
148
149std::string UTF16ToUTF8(const string16& utf16) {
150  std::string ret;
151  // Ignore the success flag of this call, it will do the best it can for
152  // invalid input, which is what we want here.
153  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
154  return ret;
155}
156
157#elif defined(WCHAR_T_IS_UTF16)
158// Easy case since we can use the "wide" versions we already wrote above.
159
160bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
161  return UTF8ToWide(src, src_len, output);
162}
163
164string16 UTF8ToUTF16(const base::StringPiece& utf8) {
165  return UTF8ToWide(utf8);
166}
167
168bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
169  return WideToUTF8(src, src_len, output);
170}
171
172std::string UTF16ToUTF8(const string16& utf16) {
173  return WideToUTF8(utf16);
174}
175
176#endif
177
178std::wstring ASCIIToWide(const base::StringPiece& ascii) {
179  DCHECK(IsStringASCII(ascii)) << ascii;
180  return std::wstring(ascii.begin(), ascii.end());
181}
182
183string16 ASCIIToUTF16(const base::StringPiece& ascii) {
184  DCHECK(IsStringASCII(ascii)) << ascii;
185  return string16(ascii.begin(), ascii.end());
186}
187