1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/utf_string_conversions.h" 6 7#include "base/string_piece.h" 8#include "base/string_util.h" 9#include "base/utf_string_conversion_utils.h" 10 11using base::PrepareForUTF8Output; 12using base::PrepareForUTF16Or32Output; 13using base::ReadUnicodeCharacter; 14using base::WriteUnicodeCharacter; 15 16namespace { 17 18// Generalized Unicode converter ----------------------------------------------- 19 20// Converts the given source Unicode character type to the given destination 21// Unicode character type as a STL string. The given input buffer and size 22// determine the source, and the given output STL string will be replaced by 23// the result. 24template<typename SRC_CHAR, typename DEST_STRING> 25bool ConvertUnicode(const SRC_CHAR* src, 26 size_t src_len, 27 DEST_STRING* output) { 28 // ICU requires 32-bit numbers. 29 bool success = true; 30 int32 src_len32 = static_cast<int32>(src_len); 31 for (int32 i = 0; i < src_len32; i++) { 32 uint32 code_point; 33 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 34 WriteUnicodeCharacter(code_point, output); 35 } else { 36 WriteUnicodeCharacter(0xFFFD, output); 37 success = false; 38 } 39 } 40 41 return success; 42} 43 44} // namespace 45 46// UTF-8 <-> Wide -------------------------------------------------------------- 47 48bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 49 PrepareForUTF8Output(src, src_len, output); 50 return ConvertUnicode(src, src_len, output); 51} 52 53std::string WideToUTF8(const std::wstring& wide) { 54 std::string ret; 55 // Ignore the success flag of this call, it will do the best it can for 56 // invalid input, which is what we want here. 57 WideToUTF8(wide.data(), wide.length(), &ret); 58 return ret; 59} 60 61bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 62 PrepareForUTF16Or32Output(src, src_len, output); 63 return ConvertUnicode(src, src_len, output); 64} 65 66std::wstring UTF8ToWide(const base::StringPiece& utf8) { 67 std::wstring ret; 68 UTF8ToWide(utf8.data(), utf8.length(), &ret); 69 return ret; 70} 71 72// UTF-16 <-> Wide ------------------------------------------------------------- 73 74#if defined(WCHAR_T_IS_UTF16) 75 76// When wide == UTF-16, then conversions are a NOP. 77bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 78 output->assign(src, src_len); 79 return true; 80} 81 82string16 WideToUTF16(const std::wstring& wide) { 83 return wide; 84} 85 86bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 87 output->assign(src, src_len); 88 return true; 89} 90 91std::wstring UTF16ToWide(const string16& utf16) { 92 return utf16; 93} 94 95#elif defined(WCHAR_T_IS_UTF32) 96 97bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 98 output->clear(); 99 // Assume that normally we won't have any non-BMP characters so the counts 100 // will be the same. 101 output->reserve(src_len); 102 return ConvertUnicode(src, src_len, output); 103} 104 105string16 WideToUTF16(const std::wstring& wide) { 106 string16 ret; 107 WideToUTF16(wide.data(), wide.length(), &ret); 108 return ret; 109} 110 111bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 112 output->clear(); 113 // Assume that normally we won't have any non-BMP characters so the counts 114 // will be the same. 115 output->reserve(src_len); 116 return ConvertUnicode(src, src_len, output); 117} 118 119std::wstring UTF16ToWide(const string16& utf16) { 120 std::wstring ret; 121 UTF16ToWide(utf16.data(), utf16.length(), &ret); 122 return ret; 123} 124 125#endif // defined(WCHAR_T_IS_UTF32) 126 127// UTF16 <-> UTF8 -------------------------------------------------------------- 128 129#if defined(WCHAR_T_IS_UTF32) 130 131bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 132 PrepareForUTF16Or32Output(src, src_len, output); 133 return ConvertUnicode(src, src_len, output); 134} 135 136string16 UTF8ToUTF16(const base::StringPiece& utf8) { 137 string16 ret; 138 // Ignore the success flag of this call, it will do the best it can for 139 // invalid input, which is what we want here. 140 UTF8ToUTF16(utf8.data(), utf8.length(), &ret); 141 return ret; 142} 143 144bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 145 PrepareForUTF8Output(src, src_len, output); 146 return ConvertUnicode(src, src_len, output); 147} 148 149std::string UTF16ToUTF8(const string16& utf16) { 150 std::string ret; 151 // Ignore the success flag of this call, it will do the best it can for 152 // invalid input, which is what we want here. 153 UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 154 return ret; 155} 156 157#elif defined(WCHAR_T_IS_UTF16) 158// Easy case since we can use the "wide" versions we already wrote above. 159 160bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 161 return UTF8ToWide(src, src_len, output); 162} 163 164string16 UTF8ToUTF16(const base::StringPiece& utf8) { 165 return UTF8ToWide(utf8); 166} 167 168bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 169 return WideToUTF8(src, src_len, output); 170} 171 172std::string UTF16ToUTF8(const string16& utf16) { 173 return WideToUTF8(utf16); 174} 175 176#endif 177 178std::wstring ASCIIToWide(const base::StringPiece& ascii) { 179 DCHECK(IsStringASCII(ascii)) << ascii; 180 return std::wstring(ascii.begin(), ascii.end()); 181} 182 183string16 ASCIIToUTF16(const base::StringPiece& ascii) { 184 DCHECK(IsStringASCII(ascii)) << ascii; 185 return string16(ascii.begin(), ascii.end()); 186} 187