utf_string_conversions.cc revision c7f5f8508d98d5952d42ed7648c2a8f30a4da156
1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/utf_string_conversions.h"
6
7#include "base/string_piece.h"
8#include "base/utf_string_conversion_utils.h"
9
10using base::PrepareForUTF8Output;
11using base::PrepareForUTF16Or32Output;
12using base::ReadUnicodeCharacter;
13using base::WriteUnicodeCharacter;
14
15namespace {
16
17// Generalized Unicode converter -----------------------------------------------
18
19// Converts the given source Unicode character type to the given destination
20// Unicode character type as a STL string. The given input buffer and size
21// determine the source, and the given output STL string will be replaced by
22// the result.
23template<typename SRC_CHAR, typename DEST_STRING>
24bool ConvertUnicode(const SRC_CHAR* src,
25                    size_t src_len,
26                    DEST_STRING* output) {
27  // ICU requires 32-bit numbers.
28  bool success = true;
29  int32 src_len32 = static_cast<int32>(src_len);
30  for (int32 i = 0; i < src_len32; i++) {
31    uint32 code_point;
32    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
33      WriteUnicodeCharacter(code_point, output);
34    } else {
35      WriteUnicodeCharacter(0xFFFD, output);
36      success = false;
37    }
38  }
39
40  return success;
41}
42
43}  // namespace
44
45// UTF-8 <-> Wide --------------------------------------------------------------
46
47bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
48  PrepareForUTF8Output(src, src_len, output);
49  return ConvertUnicode(src, src_len, output);
50}
51
52std::string WideToUTF8(const std::wstring& wide) {
53  std::string ret;
54  // Ignore the success flag of this call, it will do the best it can for
55  // invalid input, which is what we want here.
56  WideToUTF8(wide.data(), wide.length(), &ret);
57  return ret;
58}
59
60bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
61  PrepareForUTF16Or32Output(src, src_len, output);
62  return ConvertUnicode(src, src_len, output);
63}
64
65std::wstring UTF8ToWide(const base::StringPiece& utf8) {
66  std::wstring ret;
67  UTF8ToWide(utf8.data(), utf8.length(), &ret);
68  return ret;
69}
70
71// UTF-16 <-> Wide -------------------------------------------------------------
72
73#if defined(WCHAR_T_IS_UTF16)
74
75// When wide == UTF-16, then conversions are a NOP.
76bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
77  output->assign(src, src_len);
78  return true;
79}
80
81string16 WideToUTF16(const std::wstring& wide) {
82  return wide;
83}
84
85bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
86  output->assign(src, src_len);
87  return true;
88}
89
90std::wstring UTF16ToWide(const string16& utf16) {
91  return utf16;
92}
93
94#elif defined(WCHAR_T_IS_UTF32)
95
96bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
97  output->clear();
98  // Assume that normally we won't have any non-BMP characters so the counts
99  // will be the same.
100  output->reserve(src_len);
101  return ConvertUnicode(src, src_len, output);
102}
103
104string16 WideToUTF16(const std::wstring& wide) {
105  string16 ret;
106  WideToUTF16(wide.data(), wide.length(), &ret);
107  return ret;
108}
109
110bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
111  output->clear();
112  // Assume that normally we won't have any non-BMP characters so the counts
113  // will be the same.
114  output->reserve(src_len);
115  return ConvertUnicode(src, src_len, output);
116}
117
118std::wstring UTF16ToWide(const string16& utf16) {
119  std::wstring ret;
120  UTF16ToWide(utf16.data(), utf16.length(), &ret);
121  return ret;
122}
123
124#endif  // defined(WCHAR_T_IS_UTF32)
125
126// UTF16 <-> UTF8 --------------------------------------------------------------
127
128#if defined(WCHAR_T_IS_UTF32)
129
130bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
131  PrepareForUTF16Or32Output(src, src_len, output);
132  return ConvertUnicode(src, src_len, output);
133}
134
135string16 UTF8ToUTF16(const std::string& utf8) {
136  string16 ret;
137  // Ignore the success flag of this call, it will do the best it can for
138  // invalid input, which is what we want here.
139  UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
140  return ret;
141}
142
143bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
144  PrepareForUTF8Output(src, src_len, output);
145  return ConvertUnicode(src, src_len, output);
146}
147
148std::string UTF16ToUTF8(const string16& utf16) {
149  std::string ret;
150  // Ignore the success flag of this call, it will do the best it can for
151  // invalid input, which is what we want here.
152  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
153  return ret;
154}
155
156#elif defined(WCHAR_T_IS_UTF16)
157// Easy case since we can use the "wide" versions we already wrote above.
158
159bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
160  return UTF8ToWide(src, src_len, output);
161}
162
163string16 UTF8ToUTF16(const std::string& utf8) {
164  return UTF8ToWide(utf8);
165}
166
167bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
168  return WideToUTF8(src, src_len, output);
169}
170
171std::string UTF16ToUTF8(const string16& utf16) {
172  return WideToUTF8(utf16);
173}
174
175#endif
176