utf_string_conversions.cc revision c7f5f8508d98d5952d42ed7648c2a8f30a4da156
1// Copyright (c) 2009 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/utf_string_conversions.h" 6 7#include "base/string_piece.h" 8#include "base/utf_string_conversion_utils.h" 9 10using base::PrepareForUTF8Output; 11using base::PrepareForUTF16Or32Output; 12using base::ReadUnicodeCharacter; 13using base::WriteUnicodeCharacter; 14 15namespace { 16 17// Generalized Unicode converter ----------------------------------------------- 18 19// Converts the given source Unicode character type to the given destination 20// Unicode character type as a STL string. The given input buffer and size 21// determine the source, and the given output STL string will be replaced by 22// the result. 23template<typename SRC_CHAR, typename DEST_STRING> 24bool ConvertUnicode(const SRC_CHAR* src, 25 size_t src_len, 26 DEST_STRING* output) { 27 // ICU requires 32-bit numbers. 28 bool success = true; 29 int32 src_len32 = static_cast<int32>(src_len); 30 for (int32 i = 0; i < src_len32; i++) { 31 uint32 code_point; 32 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 33 WriteUnicodeCharacter(code_point, output); 34 } else { 35 WriteUnicodeCharacter(0xFFFD, output); 36 success = false; 37 } 38 } 39 40 return success; 41} 42 43} // namespace 44 45// UTF-8 <-> Wide -------------------------------------------------------------- 46 47bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 48 PrepareForUTF8Output(src, src_len, output); 49 return ConvertUnicode(src, src_len, output); 50} 51 52std::string WideToUTF8(const std::wstring& wide) { 53 std::string ret; 54 // Ignore the success flag of this call, it will do the best it can for 55 // invalid input, which is what we want here. 56 WideToUTF8(wide.data(), wide.length(), &ret); 57 return ret; 58} 59 60bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 61 PrepareForUTF16Or32Output(src, src_len, output); 62 return ConvertUnicode(src, src_len, output); 63} 64 65std::wstring UTF8ToWide(const base::StringPiece& utf8) { 66 std::wstring ret; 67 UTF8ToWide(utf8.data(), utf8.length(), &ret); 68 return ret; 69} 70 71// UTF-16 <-> Wide ------------------------------------------------------------- 72 73#if defined(WCHAR_T_IS_UTF16) 74 75// When wide == UTF-16, then conversions are a NOP. 76bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 77 output->assign(src, src_len); 78 return true; 79} 80 81string16 WideToUTF16(const std::wstring& wide) { 82 return wide; 83} 84 85bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 86 output->assign(src, src_len); 87 return true; 88} 89 90std::wstring UTF16ToWide(const string16& utf16) { 91 return utf16; 92} 93 94#elif defined(WCHAR_T_IS_UTF32) 95 96bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 97 output->clear(); 98 // Assume that normally we won't have any non-BMP characters so the counts 99 // will be the same. 100 output->reserve(src_len); 101 return ConvertUnicode(src, src_len, output); 102} 103 104string16 WideToUTF16(const std::wstring& wide) { 105 string16 ret; 106 WideToUTF16(wide.data(), wide.length(), &ret); 107 return ret; 108} 109 110bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 111 output->clear(); 112 // Assume that normally we won't have any non-BMP characters so the counts 113 // will be the same. 114 output->reserve(src_len); 115 return ConvertUnicode(src, src_len, output); 116} 117 118std::wstring UTF16ToWide(const string16& utf16) { 119 std::wstring ret; 120 UTF16ToWide(utf16.data(), utf16.length(), &ret); 121 return ret; 122} 123 124#endif // defined(WCHAR_T_IS_UTF32) 125 126// UTF16 <-> UTF8 -------------------------------------------------------------- 127 128#if defined(WCHAR_T_IS_UTF32) 129 130bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 131 PrepareForUTF16Or32Output(src, src_len, output); 132 return ConvertUnicode(src, src_len, output); 133} 134 135string16 UTF8ToUTF16(const std::string& utf8) { 136 string16 ret; 137 // Ignore the success flag of this call, it will do the best it can for 138 // invalid input, which is what we want here. 139 UTF8ToUTF16(utf8.data(), utf8.length(), &ret); 140 return ret; 141} 142 143bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 144 PrepareForUTF8Output(src, src_len, output); 145 return ConvertUnicode(src, src_len, output); 146} 147 148std::string UTF16ToUTF8(const string16& utf16) { 149 std::string ret; 150 // Ignore the success flag of this call, it will do the best it can for 151 // invalid input, which is what we want here. 152 UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 153 return ret; 154} 155 156#elif defined(WCHAR_T_IS_UTF16) 157// Easy case since we can use the "wide" versions we already wrote above. 158 159bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 160 return UTF8ToWide(src, src_len, output); 161} 162 163string16 UTF8ToUTF16(const std::string& utf8) { 164 return UTF8ToWide(utf8); 165} 166 167bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 168 return WideToUTF8(src, src_len, output); 169} 170 171std::string UTF16ToUTF8(const string16& utf16) { 172 return WideToUTF8(utf16); 173} 174 175#endif 176