sys_string_conversions_unittest.cc revision c7f5f8508d98d5952d42ed7648c2a8f30a4da156
1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <locale.h> 6 7#include <string> 8 9#include "base/basictypes.h" 10#include "base/string_piece.h" 11#include "base/string_util.h" 12#include "base/sys_string_conversions.h" 13#include "testing/gtest/include/gtest/gtest.h" 14 15#ifdef WCHAR_T_IS_UTF32 16static const std::wstring kSysWideOldItalicLetterA = L"\x10300"; 17#else 18static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; 19#endif 20 21TEST(SysStrings, SysWideToUTF8) { 22 using base::SysWideToUTF8; 23 EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world")); 24 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d")); 25 26 // >16 bits 27 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA)); 28 29 // Error case. When Windows finds a UTF-16 character going off the end of 30 // a string, it just converts that literal value to UTF-8, even though this 31 // is invalid. 32 // 33 // This is what XP does, but Vista has different behavior, so we don't bother 34 // verifying it: 35 //EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", 36 // SysWideToUTF8(L"\x4f60\xd800zyxw")); 37 38 // Test embedded NULLs. 39 std::wstring wide_null(L"a"); 40 wide_null.push_back(0); 41 wide_null.push_back('b'); 42 43 std::string expected_null("a"); 44 expected_null.push_back(0); 45 expected_null.push_back('b'); 46 47 EXPECT_EQ(expected_null, SysWideToUTF8(wide_null)); 48} 49 50TEST(SysStrings, SysUTF8ToWide) { 51 using base::SysUTF8ToWide; 52 EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world")); 53 EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); 54 // >16 bits 55 EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80")); 56 57 // Error case. When Windows finds an invalid UTF-8 character, it just skips 58 // it. This seems weird because it's inconsistent with the reverse conversion. 59 // 60 // This is what XP does, but Vista has different behavior, so we don't bother 61 // verifying it: 62 //EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); 63 64 // Test embedded NULLs. 65 std::string utf8_null("a"); 66 utf8_null.push_back(0); 67 utf8_null.push_back('b'); 68 69 std::wstring expected_null(L"a"); 70 expected_null.push_back(0); 71 expected_null.push_back('b'); 72 73 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); 74} 75 76#if defined(OS_LINUX) // Tests depend on setting a specific Linux locale. 77namespace { 78 79class ScopedSetLocale { 80 public: 81 explicit ScopedSetLocale(const char* locale) { 82 old_locale_ = setlocale(LC_ALL, NULL); 83 setlocale(LC_ALL, locale); 84 } 85 ~ScopedSetLocale() { 86 setlocale(LC_ALL, old_locale_.c_str()); 87 } 88 89 private: 90 std::string old_locale_; 91}; 92 93} // namespace 94 95TEST(SysStrings, SysWideToNativeMB) { 96 using base::SysWideToNativeMB; 97 ScopedSetLocale locale("en_US.utf-8"); 98 EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world")); 99 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d")); 100 101 // >16 bits 102 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA)); 103 104 // Error case. When Windows finds a UTF-16 character going off the end of 105 // a string, it just converts that literal value to UTF-8, even though this 106 // is invalid. 107 // 108 // This is what XP does, but Vista has different behavior, so we don't bother 109 // verifying it: 110 //EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", 111 // SysWideToNativeMB(L"\x4f60\xd800zyxw")); 112 113 // Test embedded NULLs. 114 std::wstring wide_null(L"a"); 115 wide_null.push_back(0); 116 wide_null.push_back('b'); 117 118 std::string expected_null("a"); 119 expected_null.push_back(0); 120 expected_null.push_back('b'); 121 122 EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null)); 123} 124 125// We assume the test is running in a UTF8 locale. 126TEST(SysStrings, SysNativeMBToWide) { 127 using base::SysNativeMBToWide; 128 ScopedSetLocale locale("en_US.utf-8"); 129 EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world")); 130 EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); 131 // >16 bits 132 EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80")); 133 134 // Error case. When Windows finds an invalid UTF-8 character, it just skips 135 // it. This seems weird because it's inconsistent with the reverse conversion. 136 // 137 // This is what XP does, but Vista has different behavior, so we don't bother 138 // verifying it: 139 //EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); 140 141 // Test embedded NULLs. 142 std::string utf8_null("a"); 143 utf8_null.push_back(0); 144 utf8_null.push_back('b'); 145 146 std::wstring expected_null(L"a"); 147 expected_null.push_back(0); 148 expected_null.push_back('b'); 149 150 EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null)); 151} 152 153static const wchar_t* const kConvertRoundtripCases[] = { 154 L"Google Video", 155 // "网页 图片 资讯更多 »" 156 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", 157 // "Παγκόσμιος Ιστός" 158 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" 159 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", 160 // "Поиск страниц на русском" 161 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" 162 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" 163 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", 164 // "전체서비스" 165 L"\xc804\xccb4\xc11c\xbe44\xc2a4", 166 167 // Test characters that take more than 16 bits. This will depend on whether 168 // wchar_t is 16 or 32 bits. 169#if defined(WCHAR_T_IS_UTF16) 170 L"\xd800\xdf00", 171 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 172 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", 173#elif defined(WCHAR_T_IS_UTF32) 174 L"\x10300", 175 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 176 L"\x11d40\x11d41\x11d42\x11d43\x11d44", 177#endif 178}; 179 180 181TEST(SysStrings, SysNativeMBAndWide) { 182 ScopedSetLocale locale("en_US.utf-8"); 183 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 184 std::wstring wide = kConvertRoundtripCases[i]; 185 std::wstring trip = base::SysNativeMBToWide(base::SysWideToNativeMB(wide)); 186 EXPECT_EQ(wide.size(), trip.size()); 187 EXPECT_EQ(wide, trip); 188 } 189 190 // We assume our test is running in UTF-8, so double check through ICU. 191 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 192 std::wstring wide = kConvertRoundtripCases[i]; 193 std::wstring trip = base::SysNativeMBToWide(WideToUTF8(wide)); 194 EXPECT_EQ(wide.size(), trip.size()); 195 EXPECT_EQ(wide, trip); 196 } 197 198 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 199 std::wstring wide = kConvertRoundtripCases[i]; 200 std::wstring trip = UTF8ToWide(base::SysWideToNativeMB(wide)); 201 EXPECT_EQ(wide.size(), trip.size()); 202 EXPECT_EQ(wide, trip); 203 } 204} 205#endif // OS_LINUX 206