1c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes/* 2c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* Copyright (C) 2015 The Android Open Source Project 3c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* 4c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* Licensed under the Apache License, Version 2.0 (the "License"); 5c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* you may not use this file except in compliance with the License. 6c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* You may obtain a copy of the License at 7c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* 8c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* http://www.apache.org/licenses/LICENSE-2.0 9c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* 10c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* Unless required by applicable law or agreed to in writing, software 11c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* distributed under the License is distributed on an "AS IS" BASIS, 12c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* See the License for the specific language governing permissions and 14c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* limitations under the License. 15c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes*/ 16c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 174f71319df011d796a60a43fc1bc68e16fbf7d321Elliott Hughes#include "android-base/utf8.h" 18c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 19c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#include <gtest/gtest.h> 20c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 214f71319df011d796a60a43fc1bc68e16fbf7d321Elliott Hughes#include "android-base/macros.h" 22c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 23c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesnamespace android { 24c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesnamespace base { 25c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 26c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertInvalidUTF8) { 27c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wstring wide; 28c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 29d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low errno = 0; 30d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low 31c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Standalone \xa2 is an invalid UTF-8 sequence, so this should return an 32c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // error. Concatenate two C/C++ literal string constants to prevent the 33c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // compiler from giving an error about "\xa2af" containing a "hex escape 34c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // sequence out of range". 35c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_FALSE(android::base::UTF8ToWide("before\xa2" "after", &wide)); 36c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 37d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low EXPECT_EQ(EILSEQ, errno); 38d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low 39c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Even if an invalid character is encountered, UTF8ToWide() should still do 40c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // its best to convert the rest of the string. sysdeps_win32.cpp: 41c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // _console_write_utf8() depends on this behavior. 42c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // 43c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Thus, we verify that the valid characters are converted, but we ignore the 44c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // specific replacement character that UTF8ToWide() may replace the invalid 45c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // UTF-8 characters with because we want to allow that to change if the 46c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // implementation changes. 47bac7bb5c16b6725436c61c6785701d9b51ed28f6Dan Albert EXPECT_EQ(0U, wide.find(L"before")); 48c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const wchar_t after_wide[] = L"after"; 49c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(wide.length() - (arraysize(after_wide) - 1), wide.find(after_wide)); 50c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 51c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 52c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Below is adapted from https://chromium.googlesource.com/chromium/src/+/master/base/strings/utf_string_conversions_unittest.cc 53c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 54c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Copyright (c) 2010 The Chromium Authors. All rights reserved. 55c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Use of this source code is governed by a BSD-style license that can be 56c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// found in the LICENSE file. 57c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 58c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// The tests below from utf_string_conversions_unittest.cc check for this 59c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// preprocessor symbol, so define it, as it is appropriate for Windows. 60c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#define WCHAR_T_IS_UTF16 61c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic_assert(sizeof(wchar_t) == 2, "wchar_t is not 2 bytes"); 62c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 63c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// The tests below from utf_string_conversions_unittest.cc call versions of 64c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// UTF8ToWide() and WideToUTF8() that don't return success/failure, so these are 65c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// stub implementations with that signature. These are just for testing and 66c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// should not be moved to base because they assert/expect no errors which is 67c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// probably not a good idea (or at least it is something that should be left 68c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// up to the caller, not a base library). 69c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 70c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::wstring UTF8ToWide(const std::string& utf8) { 71c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wstring utf16; 72c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_TRUE(UTF8ToWide(utf8, &utf16)); 73c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes return utf16; 74c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 75c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 76c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::string WideToUTF8(const std::wstring& utf16) { 77c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string utf8; 78c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_TRUE(WideToUTF8(utf16, &utf8)); 79c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes return utf8; 80c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 81c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 82c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesnamespace { 83c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 84c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesconst wchar_t* const kConvertRoundtripCases[] = { 85c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"Google Video", 86c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // "网页 图片 资讯更多 »" 87c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", 88c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // "Παγκόσμιος Ιστός" 89c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" 90c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", 91c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // "Поиск страниц на русском" 92c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" 93c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" 94c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", 95c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // "전체서비스" 96c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\xc804\xccb4\xc11c\xbe44\xc2a4", 97c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 98c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Test characters that take more than 16 bits. This will depend on whether 99c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // wchar_t is 16 or 32 bits. 100c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if defined(WCHAR_T_IS_UTF16) 101c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\xd800\xdf00", 102c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 103c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", 104c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#elif defined(WCHAR_T_IS_UTF32) 105c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\x10300", 106c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 107c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes L"\x11d40\x11d41\x11d42\x11d43\x11d44", 108c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif 109c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}; 110c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 111c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} // namespace 112c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 113c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF8AndWide) { 114c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // we round-trip all the wide strings through UTF-8 to make sure everything 115c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // agrees on the conversion. This uses the stream operators to test them 116c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // simultaneously. 117c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 118c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::ostringstream utf8; 119c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes utf8 << WideToUTF8(kConvertRoundtripCases[i]); 120c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wostringstream wide; 121c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes wide << UTF8ToWide(utf8.str()); 122c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 123c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(kConvertRoundtripCases[i], wide.str()); 124c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } 125c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 126c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 127c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF8AndWideEmptyString) { 128c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // An empty std::wstring should be converted to an empty std::string, 129c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // and vice versa. 130c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wstring wempty; 131c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string empty; 132c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(empty, WideToUTF8(wempty)); 133c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(wempty, UTF8ToWide(empty)); 134c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 135c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 136c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF8ToWide) { 137c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes struct UTF8ToWideCase { 138c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const char* utf8; 139c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const wchar_t* wide; 140c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes bool success; 141c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } convert_cases[] = { 142c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Regular UTF-8 input. 143c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, 144c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Non-character is passed through. 145c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"\xef\xbf\xbfHello", L"\xffffHello", true}, 146c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Truncated UTF-8 sequence. 147c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, 148c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Truncated off the end. 149c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false}, 150c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Non-shortest-form UTF-8. 151c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, 152c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. 153c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Note that for whatever reason, this test fails on Windows XP. 154c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"\xed\xb0\x80", L"\xfffd", false}, 155c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Non-BMP characters. The second is a non-character regarded as valid. 156c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // The result will either be in UTF-16 or UTF-32. 157c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if defined(WCHAR_T_IS_UTF16) 158c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, 159c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true}, 160c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#elif defined(WCHAR_T_IS_UTF32) 161c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, 162c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true}, 163c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif 164c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes }; 165c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 166c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes for (size_t i = 0; i < arraysize(convert_cases); i++) { 167c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wstring converted; 168d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low errno = 0; 169c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const bool success = UTF8ToWide(convert_cases[i].utf8, 170c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes strlen(convert_cases[i].utf8), 171c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes &converted); 172c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(convert_cases[i].success, success); 173c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // The original test always compared expected and converted, but don't do 174c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // that because our implementation of UTF8ToWide() does not guarantee to 175c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // produce the same output in error situations. 176c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes if (success) { 177c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wstring expected(convert_cases[i].wide); 178c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(expected, converted); 179d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low } else { 180d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low EXPECT_EQ(EILSEQ, errno); 181c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } 182c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } 183c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 184c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Manually test an embedded NULL. 185c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wstring converted; 186c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted)); 187c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes ASSERT_EQ(3U, converted.length()); 188c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(static_cast<wchar_t>(0), converted[0]); 189c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ('Z', converted[1]); 190c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ('\t', converted[2]); 191c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 192c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Make sure that conversion replaces, not appends. 193c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_TRUE(UTF8ToWide("B", 1, &converted)); 194c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes ASSERT_EQ(1U, converted.length()); 195c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ('B', converted[0]); 196c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 197c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 198c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if defined(WCHAR_T_IS_UTF16) 199c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// This test is only valid when wchar_t == UTF-16. 200c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) { 201c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes struct WideToUTF8Case { 202c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const wchar_t* utf16; 203c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const char* utf8; 204c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes bool success; 205c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } convert_cases[] = { 206c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Regular UTF-16 input. 207c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, 208c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Test a non-BMP character. 209c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, 210c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Non-characters are passed through. 211c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\xffffHello", "\xEF\xBF\xBFHello", true}, 212c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true}, 213c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // The first character is a truncated UTF-16 character. 214c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Note that for whatever reason, this test fails on Windows XP. 215c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", 216c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if (WINVER >= 0x0600) 217c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Only Vista and later has a new API/flag that correctly returns false. 218c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes false 219c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#else 220c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes true 221c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif 222c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes }, 223c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Truncated at the end. 224c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Note that for whatever reason, this test fails on Windows XP. 225c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", 226c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if (WINVER >= 0x0600) 227c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Only Vista and later has a new API/flag that correctly returns false. 228c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes false 229c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#else 230c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes true 231c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif 232c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes }, 233c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes }; 234c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 235c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes for (size_t i = 0; i < arraysize(convert_cases); i++) { 236c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string converted; 237d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low errno = 0; 238c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const bool success = WideToUTF8(convert_cases[i].utf16, 239c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes wcslen(convert_cases[i].utf16), 240c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes &converted); 241c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(convert_cases[i].success, success); 242c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // The original test always compared expected and converted, but don't do 243c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // that because our implementation of WideToUTF8() does not guarantee to 244c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // produce the same output in error situations. 245c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes if (success) { 246c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string expected(convert_cases[i].utf8); 247c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(expected, converted); 248d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low } else { 249d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low EXPECT_EQ(EILSEQ, errno); 250c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } 251c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } 252c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 253c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 254c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#elif defined(WCHAR_T_IS_UTF32) 255c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// This test is only valid when wchar_t == UTF-32. 256c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { 257c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes struct WideToUTF8Case { 258c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const wchar_t* utf32; 259c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const char* utf8; 260c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes bool success; 261c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } convert_cases[] = { 262c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Regular 16-bit input. 263c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, 264c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Test a non-BMP character. 265c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, 266c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Non-characters are passed through. 267c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\xffffHello", "\xEF\xBF\xBFHello", true}, 268c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true}, 269c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Invalid Unicode code points. 270c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\xfffffffHello", "\xEF\xBF\xBDHello", false}, 271c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // The first character is a truncated UTF-16 character. 272c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, 273c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes {L"\xdc01Hello", "\xef\xbf\xbdHello", false}, 274c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes }; 275c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 276c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes for (size_t i = 0; i < arraysize(convert_cases); i++) { 277c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string converted; 278c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(convert_cases[i].success, 279c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes WideToUTF8(convert_cases[i].utf32, 280c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes wcslen(convert_cases[i].utf32), 281c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes &converted)); 282c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string expected(convert_cases[i].utf8); 283c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(expected, converted); 284c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes } 285c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 286c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif // defined(WCHAR_T_IS_UTF32) 287c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 288c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// The test below uses these types and functions, so just do enough to get the 289c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// test running. 290c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughestypedef wchar_t char16; 291c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughestypedef std::wstring string16; 292c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 293c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughestemplate<typename T> 294c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic void* WriteInto(T* t, size_t size) { 295c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // std::(w)string::resize() already includes space for a NULL terminator. 296c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes t->resize(size - 1); 297c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes return &(*t)[0]; 298c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 299c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 300c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// A stub implementation that calls a helper from above, just to get the test 301c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// below working. This is just for testing and should not be moved to base 302c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// because this ignores errors which is probably not a good idea, plus it takes 303c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// a string16 type which we don't really have. 304c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::string UTF16ToUTF8(const string16& utf16) { 305c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes return WideToUTF8(utf16); 306c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 307c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 308c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertMultiString) { 309c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes static char16 multi16[] = { 310c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 'f', 'o', 'o', '\0', 311c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 'b', 'a', 'r', '\0', 312c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 'b', 'a', 'z', '\0', 313c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes '\0' 314c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes }; 315c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes static char multi[] = { 316c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 'f', 'o', 'o', '\0', 317c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 'b', 'a', 'r', '\0', 318c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 'b', 'a', 'z', '\0', 319c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes '\0' 320c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes }; 321c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes string16 multistring16; 322c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes memcpy(WriteInto(&multistring16, arraysize(multi16)), multi16, 323c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes sizeof(multi16)); 324c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(arraysize(multi16) - 1, multistring16.length()); 325c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string expected; 326c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); 327c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(arraysize(multi) - 1, expected.length()); 328c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes const std::string& converted = UTF16ToUTF8(multistring16); 329c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(arraysize(multi) - 1, converted.length()); 330c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(expected, converted); 331c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 332c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 333c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// The tests below from sys_string_conversions_unittest.cc call SysWideToUTF8() 334c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// and SysUTF8ToWide(), so these are stub implementations that call the helpers 335c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// above. These are just for testing and should not be moved to base because 336c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// they ignore errors which is probably not a good idea. 337c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 338c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::string SysWideToUTF8(const std::wstring& utf16) { 339c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes return WideToUTF8(utf16); 340c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 341c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 342c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::wstring SysUTF8ToWide(const std::string& utf8) { 343c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes return UTF8ToWide(utf8); 344c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 345c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 346c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Below is adapted from https://chromium.googlesource.com/chromium/src/+/master/base/strings/sys_string_conversions_unittest.cc 347c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 348c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Copyright (c) 2011 The Chromium Authors. All rights reserved. 349c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Use of this source code is governed by a BSD-style license that can be 350c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// found in the LICENSE file. 351c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 352c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#ifdef WCHAR_T_IS_UTF32 353c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic const std::wstring kSysWideOldItalicLetterA = L"\x10300"; 354c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#else 355c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; 356c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif 357c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 358c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(SysStrings, SysWideToUTF8) { 359c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world")); 360c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d")); 361c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 362c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // >16 bits 363c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA)); 364c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 365c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Error case. When Windows finds a UTF-16 character going off the end of 366c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // a string, it just converts that literal value to UTF-8, even though this 367c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // is invalid. 368c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // 369c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // This is what XP does, but Vista has different behavior, so we don't bother 370c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // verifying it: 371c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", 372c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // SysWideToUTF8(L"\x4f60\xd800zyxw")); 373c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 374c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Test embedded NULLs. 375c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wstring wide_null(L"a"); 376c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes wide_null.push_back(0); 377c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes wide_null.push_back('b'); 378c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 379c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string expected_null("a"); 380c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes expected_null.push_back(0); 381c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes expected_null.push_back('b'); 382c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 383c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(expected_null, SysWideToUTF8(wide_null)); 384c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 385c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 386c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(SysStrings, SysUTF8ToWide) { 387c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world")); 388c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); 389c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // >16 bits 390c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80")); 391c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 392c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Error case. When Windows finds an invalid UTF-8 character, it just skips 393c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // it. This seems weird because it's inconsistent with the reverse conversion. 394c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // 395c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // This is what XP does, but Vista has different behavior, so we don't bother 396c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // verifying it: 397c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); 398c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 399c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes // Test embedded NULLs. 400c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::string utf8_null("a"); 401c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes utf8_null.push_back(0); 402c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes utf8_null.push_back('b'); 403c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 404c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes std::wstring expected_null(L"a"); 405c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes expected_null.push_back(0); 406c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes expected_null.push_back('b'); 407c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 408c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); 409c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} 410c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes 411c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} // namespace base 412c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes} // namespace android 413