15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2010 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/strings/utf_string_conversions.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/strings/string_piece.h" 8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h" 92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/utf_string_conversion_utils.h" 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace base { 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Generalized Unicode converter ----------------------------------------------- 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Converts the given source Unicode character type to the given destination 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Unicode character type as a STL string. The given input buffer and size 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// determine the source, and the given output STL string will be replaced by 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the result. 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template<typename SRC_CHAR, typename DEST_STRING> 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ConvertUnicode(const SRC_CHAR* src, 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t src_len, 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEST_STRING* output) { 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // ICU requires 32-bit numbers. 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool success = true; 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 src_len32 = static_cast<int32>(src_len); 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int32 i = 0; i < src_len32; i++) { 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) uint32 code_point; 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) WriteUnicodeCharacter(code_point, output); 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) WriteUnicodeCharacter(0xFFFD, output); 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) success = false; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return success; 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF-8 <-> Wide -------------------------------------------------------------- 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PrepareForUTF8Output(src, src_len, output); 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ConvertUnicode(src, src_len, output); 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string WideToUTF8(const std::wstring& wide) { 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string ret; 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Ignore the success flag of this call, it will do the best it can for 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // invalid input, which is what we want here. 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) WideToUTF8(wide.data(), wide.length(), &ret); 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ret; 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PrepareForUTF16Or32Output(src, src_len, output); 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ConvertUnicode(src, src_len, output); 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)std::wstring UTF8ToWide(const StringPiece& utf8) { 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::wstring ret; 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UTF8ToWide(utf8.data(), utf8.length(), &ret); 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ret; 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF-16 <-> Wide ------------------------------------------------------------- 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(WCHAR_T_IS_UTF16) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// When wide == UTF-16, then conversions are a NOP. 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) output->assign(src, src_len); 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string16 WideToUTF16(const std::wstring& wide) { 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return wide; 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) output->assign(src, src_len); 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::wstring UTF16ToWide(const string16& utf16) { 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return utf16; 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(WCHAR_T_IS_UTF32) 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) output->clear(); 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Assume that normally we won't have any non-BMP characters so the counts 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // will be the same. 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) output->reserve(src_len); 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ConvertUnicode(src, src_len, output); 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string16 WideToUTF16(const std::wstring& wide) { 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16 ret; 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) WideToUTF16(wide.data(), wide.length(), &ret); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ret; 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) output->clear(); 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Assume that normally we won't have any non-BMP characters so the counts 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // will be the same. 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) output->reserve(src_len); 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ConvertUnicode(src, src_len, output); 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::wstring UTF16ToWide(const string16& utf16) { 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::wstring ret; 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UTF16ToWide(utf16.data(), utf16.length(), &ret); 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ret; 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // defined(WCHAR_T_IS_UTF32) 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF16 <-> UTF8 -------------------------------------------------------------- 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(WCHAR_T_IS_UTF32) 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PrepareForUTF16Or32Output(src, src_len, output); 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ConvertUnicode(src, src_len, output); 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)string16 UTF8ToUTF16(const StringPiece& utf8) { 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16 ret; 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Ignore the success flag of this call, it will do the best it can for 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // invalid input, which is what we want here. 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UTF8ToUTF16(utf8.data(), utf8.length(), &ret); 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ret; 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PrepareForUTF8Output(src, src_len, output); 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ConvertUnicode(src, src_len, output); 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UTF16ToUTF8(const string16& utf16) { 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string ret; 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Ignore the success flag of this call, it will do the best it can for 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // invalid input, which is what we want here. 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ret; 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(WCHAR_T_IS_UTF16) 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Easy case since we can use the "wide" versions we already wrote above. 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return UTF8ToWide(src, src_len, output); 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)string16 UTF8ToUTF16(const StringPiece& utf8) { 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return UTF8ToWide(utf8); 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return WideToUTF8(src, src_len, output); 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UTF16ToUTF8(const string16& utf16) { 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return WideToUTF8(utf16); 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)std::wstring ASCIIToWide(const StringPiece& ascii) { 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(IsStringASCII(ascii)) << ascii; 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return std::wstring(ascii.begin(), ascii.end()); 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 180c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)string16 ASCIIToUTF16(const StringPiece& ascii) { 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(IsStringASCII(ascii)) << ascii; 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return string16(ascii.begin(), ascii.end()); 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 184c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 185a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)std::string UTF16ToASCII(const string16& utf16) { 186a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16); 187a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return std::string(utf16.begin(), utf16.end()); 188a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 189a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 190c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} // namespace base 191