1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/utf_string_conversions.h"
6
7#include "base/strings/string_piece.h"
8#include "base/strings/string_util.h"
9#include "base/strings/utf_string_conversion_utils.h"
10
11namespace base {
12
13namespace {
14
15// Generalized Unicode converter -----------------------------------------------
16
17// Converts the given source Unicode character type to the given destination
18// Unicode character type as a STL string. The given input buffer and size
19// determine the source, and the given output STL string will be replaced by
20// the result.
21template<typename SRC_CHAR, typename DEST_STRING>
22bool ConvertUnicode(const SRC_CHAR* src,
23                    size_t src_len,
24                    DEST_STRING* output) {
25  // ICU requires 32-bit numbers.
26  bool success = true;
27  int32 src_len32 = static_cast<int32>(src_len);
28  for (int32 i = 0; i < src_len32; i++) {
29    uint32 code_point;
30    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
31      WriteUnicodeCharacter(code_point, output);
32    } else {
33      WriteUnicodeCharacter(0xFFFD, output);
34      success = false;
35    }
36  }
37
38  return success;
39}
40
41}  // namespace
42
43// UTF-8 <-> Wide --------------------------------------------------------------
44
45bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
46  PrepareForUTF8Output(src, src_len, output);
47  return ConvertUnicode(src, src_len, output);
48}
49
50std::string WideToUTF8(const std::wstring& wide) {
51  std::string ret;
52  // Ignore the success flag of this call, it will do the best it can for
53  // invalid input, which is what we want here.
54  WideToUTF8(wide.data(), wide.length(), &ret);
55  return ret;
56}
57
58bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
59  PrepareForUTF16Or32Output(src, src_len, output);
60  return ConvertUnicode(src, src_len, output);
61}
62
63std::wstring UTF8ToWide(const StringPiece& utf8) {
64  std::wstring ret;
65  UTF8ToWide(utf8.data(), utf8.length(), &ret);
66  return ret;
67}
68
69// UTF-16 <-> Wide -------------------------------------------------------------
70
71#if defined(WCHAR_T_IS_UTF16)
72
73// When wide == UTF-16, then conversions are a NOP.
74bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
75  output->assign(src, src_len);
76  return true;
77}
78
79string16 WideToUTF16(const std::wstring& wide) {
80  return wide;
81}
82
83bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
84  output->assign(src, src_len);
85  return true;
86}
87
88std::wstring UTF16ToWide(const string16& utf16) {
89  return utf16;
90}
91
92#elif defined(WCHAR_T_IS_UTF32)
93
94bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
95  output->clear();
96  // Assume that normally we won't have any non-BMP characters so the counts
97  // will be the same.
98  output->reserve(src_len);
99  return ConvertUnicode(src, src_len, output);
100}
101
102string16 WideToUTF16(const std::wstring& wide) {
103  string16 ret;
104  WideToUTF16(wide.data(), wide.length(), &ret);
105  return ret;
106}
107
108bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
109  output->clear();
110  // Assume that normally we won't have any non-BMP characters so the counts
111  // will be the same.
112  output->reserve(src_len);
113  return ConvertUnicode(src, src_len, output);
114}
115
116std::wstring UTF16ToWide(const string16& utf16) {
117  std::wstring ret;
118  UTF16ToWide(utf16.data(), utf16.length(), &ret);
119  return ret;
120}
121
122#endif  // defined(WCHAR_T_IS_UTF32)
123
124// UTF16 <-> UTF8 --------------------------------------------------------------
125
126#if defined(WCHAR_T_IS_UTF32)
127
128bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
129  PrepareForUTF16Or32Output(src, src_len, output);
130  return ConvertUnicode(src, src_len, output);
131}
132
133string16 UTF8ToUTF16(const StringPiece& utf8) {
134  string16 ret;
135  // Ignore the success flag of this call, it will do the best it can for
136  // invalid input, which is what we want here.
137  UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
138  return ret;
139}
140
141bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
142  PrepareForUTF8Output(src, src_len, output);
143  return ConvertUnicode(src, src_len, output);
144}
145
146std::string UTF16ToUTF8(const string16& utf16) {
147  std::string ret;
148  // Ignore the success flag of this call, it will do the best it can for
149  // invalid input, which is what we want here.
150  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
151  return ret;
152}
153
154#elif defined(WCHAR_T_IS_UTF16)
155// Easy case since we can use the "wide" versions we already wrote above.
156
157bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
158  return UTF8ToWide(src, src_len, output);
159}
160
161string16 UTF8ToUTF16(const StringPiece& utf8) {
162  return UTF8ToWide(utf8);
163}
164
165bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
166  return WideToUTF8(src, src_len, output);
167}
168
169std::string UTF16ToUTF8(const string16& utf16) {
170  return WideToUTF8(utf16);
171}
172
173#endif
174
175std::wstring ASCIIToWide(const StringPiece& ascii) {
176  DCHECK(IsStringASCII(ascii)) << ascii;
177  return std::wstring(ascii.begin(), ascii.end());
178}
179
180string16 ASCIIToUTF16(const StringPiece& ascii) {
181  DCHECK(IsStringASCII(ascii)) << ascii;
182  return string16(ascii.begin(), ascii.end());
183}
184
185std::string UTF16ToASCII(const string16& utf16) {
186  DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
187  return std::string(utf16.begin(), utf16.end());
188}
189
190}  // namespace base
191