1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/strings/utf_string_conversions.h" 6 7#include <stdint.h> 8 9#include "base/strings/string_piece.h" 10#include "base/strings/string_util.h" 11#include "base/strings/utf_string_conversion_utils.h" 12#include "build/build_config.h" 13 14namespace base { 15 16namespace { 17 18// Generalized Unicode converter ----------------------------------------------- 19 20// Converts the given source Unicode character type to the given destination 21// Unicode character type as a STL string. The given input buffer and size 22// determine the source, and the given output STL string will be replaced by 23// the result. 24template<typename SRC_CHAR, typename DEST_STRING> 25bool ConvertUnicode(const SRC_CHAR* src, 26 size_t src_len, 27 DEST_STRING* output) { 28 // ICU requires 32-bit numbers. 29 bool success = true; 30 int32_t src_len32 = static_cast<int32_t>(src_len); 31 for (int32_t i = 0; i < src_len32; i++) { 32 uint32_t code_point; 33 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 34 WriteUnicodeCharacter(code_point, output); 35 } else { 36 WriteUnicodeCharacter(0xFFFD, output); 37 success = false; 38 } 39 } 40 41 return success; 42} 43 44} // namespace 45 46// UTF-8 <-> Wide -------------------------------------------------------------- 47 48bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 49 if (IsStringASCII(std::wstring(src, src_len))) { 50 output->assign(src, src + src_len); 51 return true; 52 } else { 53 PrepareForUTF8Output(src, src_len, output); 54 return ConvertUnicode(src, src_len, output); 55 } 56} 57 58std::string WideToUTF8(const std::wstring& wide) { 59 if (IsStringASCII(wide)) { 60 return std::string(wide.data(), wide.data() + wide.length()); 61 } 62 63 std::string ret; 64 PrepareForUTF8Output(wide.data(), wide.length(), &ret); 65 ConvertUnicode(wide.data(), wide.length(), &ret); 66 return ret; 67} 68 69bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 70 if (IsStringASCII(StringPiece(src, src_len))) { 71 output->assign(src, src + src_len); 72 return true; 73 } else { 74 PrepareForUTF16Or32Output(src, src_len, output); 75 return ConvertUnicode(src, src_len, output); 76 } 77} 78 79std::wstring UTF8ToWide(StringPiece utf8) { 80 if (IsStringASCII(utf8)) { 81 return std::wstring(utf8.begin(), utf8.end()); 82 } 83 84 std::wstring ret; 85 PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); 86 ConvertUnicode(utf8.data(), utf8.length(), &ret); 87 return ret; 88} 89 90// UTF-16 <-> Wide ------------------------------------------------------------- 91 92#if defined(WCHAR_T_IS_UTF16) 93 94// When wide == UTF-16, then conversions are a NOP. 95bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 96 output->assign(src, src_len); 97 return true; 98} 99 100string16 WideToUTF16(const std::wstring& wide) { 101 return wide; 102} 103 104bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 105 output->assign(src, src_len); 106 return true; 107} 108 109std::wstring UTF16ToWide(const string16& utf16) { 110 return utf16; 111} 112 113#elif defined(WCHAR_T_IS_UTF32) 114 115bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 116 output->clear(); 117 // Assume that normally we won't have any non-BMP characters so the counts 118 // will be the same. 119 output->reserve(src_len); 120 return ConvertUnicode(src, src_len, output); 121} 122 123string16 WideToUTF16(const std::wstring& wide) { 124 string16 ret; 125 WideToUTF16(wide.data(), wide.length(), &ret); 126 return ret; 127} 128 129bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 130 output->clear(); 131 // Assume that normally we won't have any non-BMP characters so the counts 132 // will be the same. 133 output->reserve(src_len); 134 return ConvertUnicode(src, src_len, output); 135} 136 137std::wstring UTF16ToWide(const string16& utf16) { 138 std::wstring ret; 139 UTF16ToWide(utf16.data(), utf16.length(), &ret); 140 return ret; 141} 142 143#endif // defined(WCHAR_T_IS_UTF32) 144 145// UTF16 <-> UTF8 -------------------------------------------------------------- 146 147#if defined(WCHAR_T_IS_UTF32) 148 149bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 150 if (IsStringASCII(StringPiece(src, src_len))) { 151 output->assign(src, src + src_len); 152 return true; 153 } else { 154 PrepareForUTF16Or32Output(src, src_len, output); 155 return ConvertUnicode(src, src_len, output); 156 } 157} 158 159string16 UTF8ToUTF16(StringPiece utf8) { 160 if (IsStringASCII(utf8)) { 161 return string16(utf8.begin(), utf8.end()); 162 } 163 164 string16 ret; 165 PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); 166 // Ignore the success flag of this call, it will do the best it can for 167 // invalid input, which is what we want here. 168 ConvertUnicode(utf8.data(), utf8.length(), &ret); 169 return ret; 170} 171 172bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 173 if (IsStringASCII(StringPiece16(src, src_len))) { 174 output->assign(src, src + src_len); 175 return true; 176 } else { 177 PrepareForUTF8Output(src, src_len, output); 178 return ConvertUnicode(src, src_len, output); 179 } 180} 181 182std::string UTF16ToUTF8(StringPiece16 utf16) { 183 std::string ret; 184 // Ignore the success flag of this call, it will do the best it can for 185 // invalid input, which is what we want here. 186 UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 187 return ret; 188} 189 190#elif defined(WCHAR_T_IS_UTF16) 191// Easy case since we can use the "wide" versions we already wrote above. 192 193bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 194 return UTF8ToWide(src, src_len, output); 195} 196 197string16 UTF8ToUTF16(StringPiece utf8) { 198 return UTF8ToWide(utf8); 199} 200 201bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 202 return WideToUTF8(src, src_len, output); 203} 204 205std::string UTF16ToUTF8(StringPiece16 utf16) { 206 if (IsStringASCII(utf16)) 207 return std::string(utf16.data(), utf16.data() + utf16.length()); 208 209 std::string ret; 210 PrepareForUTF8Output(utf16.data(), utf16.length(), &ret); 211 ConvertUnicode(utf16.data(), utf16.length(), &ret); 212 return ret; 213} 214 215#endif 216 217string16 ASCIIToUTF16(StringPiece ascii) { 218 DCHECK(IsStringASCII(ascii)) << ascii; 219 return string16(ascii.begin(), ascii.end()); 220} 221 222std::string UTF16ToASCII(StringPiece16 utf16) { 223 DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16); 224 return std::string(utf16.begin(), utf16.end()); 225} 226 227} // namespace base 228