1c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes/*
2c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* Copyright (C) 2015 The Android Open Source Project
3c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes*
4c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* Licensed under the Apache License, Version 2.0 (the "License");
5c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* you may not use this file except in compliance with the License.
6c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* You may obtain a copy of the License at
7c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes*
8c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes*      http://www.apache.org/licenses/LICENSE-2.0
9c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes*
10c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* Unless required by applicable law or agreed to in writing, software
11c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* distributed under the License is distributed on an "AS IS" BASIS,
12c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* See the License for the specific language governing permissions and
14c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes* limitations under the License.
15c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes*/
16c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
174f71319df011d796a60a43fc1bc68e16fbf7d321Elliott Hughes#include "android-base/utf8.h"
18c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
19c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#include <gtest/gtest.h>
20c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
214f71319df011d796a60a43fc1bc68e16fbf7d321Elliott Hughes#include "android-base/macros.h"
22c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
23c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesnamespace android {
24c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesnamespace base {
25c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
26c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertInvalidUTF8) {
27c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::wstring wide;
28c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
29d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low  errno = 0;
30d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low
31c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Standalone \xa2 is an invalid UTF-8 sequence, so this should return an
32c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // error. Concatenate two C/C++ literal string constants to prevent the
33c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // compiler from giving an error about "\xa2af" containing a "hex escape
34c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // sequence out of range".
35c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_FALSE(android::base::UTF8ToWide("before\xa2" "after", &wide));
36c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
37d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low  EXPECT_EQ(EILSEQ, errno);
38d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low
39c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Even if an invalid character is encountered, UTF8ToWide() should still do
40c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // its best to convert the rest of the string. sysdeps_win32.cpp:
41c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // _console_write_utf8() depends on this behavior.
42c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  //
43c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Thus, we verify that the valid characters are converted, but we ignore the
44c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // specific replacement character that UTF8ToWide() may replace the invalid
45c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // UTF-8 characters with because we want to allow that to change if the
46c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // implementation changes.
47bac7bb5c16b6725436c61c6785701d9b51ed28f6Dan Albert  EXPECT_EQ(0U, wide.find(L"before"));
48c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  const wchar_t after_wide[] = L"after";
49c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(wide.length() - (arraysize(after_wide) - 1), wide.find(after_wide));
50c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
51c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
52c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Below is adapted from https://chromium.googlesource.com/chromium/src/+/master/base/strings/utf_string_conversions_unittest.cc
53c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
54c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Copyright (c) 2010 The Chromium Authors. All rights reserved.
55c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Use of this source code is governed by a BSD-style license that can be
56c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// found in the LICENSE file.
57c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
58c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// The tests below from utf_string_conversions_unittest.cc check for this
59c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// preprocessor symbol, so define it, as it is appropriate for Windows.
60c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#define WCHAR_T_IS_UTF16
61c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic_assert(sizeof(wchar_t) == 2, "wchar_t is not 2 bytes");
62c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
63c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// The tests below from utf_string_conversions_unittest.cc call versions of
64c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// UTF8ToWide() and WideToUTF8() that don't return success/failure, so these are
65c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// stub implementations with that signature. These are just for testing and
66c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// should not be moved to base because they assert/expect no errors which is
67c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// probably not a good idea (or at least it is something that should be left
68c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// up to the caller, not a base library).
69c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
70c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::wstring UTF8ToWide(const std::string& utf8) {
71c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::wstring utf16;
72c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_TRUE(UTF8ToWide(utf8, &utf16));
73c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  return utf16;
74c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
75c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
76c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::string WideToUTF8(const std::wstring& utf16) {
77c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::string utf8;
78c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_TRUE(WideToUTF8(utf16, &utf8));
79c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  return utf8;
80c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
81c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
82c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesnamespace {
83c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
84c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesconst wchar_t* const kConvertRoundtripCases[] = {
85c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"Google Video",
86c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // "网页 图片 资讯更多 »"
87c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
88c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  //  "Παγκόσμιος Ιστός"
89c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
90c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
91c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // "Поиск страниц на русском"
92c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
93c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
94c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
95c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // "전체서비스"
96c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\xc804\xccb4\xc11c\xbe44\xc2a4",
97c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
98c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Test characters that take more than 16 bits. This will depend on whether
99c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // wchar_t is 16 or 32 bits.
100c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if defined(WCHAR_T_IS_UTF16)
101c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\xd800\xdf00",
102c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
103c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
104c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#elif defined(WCHAR_T_IS_UTF32)
105c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\x10300",
106c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
107c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  L"\x11d40\x11d41\x11d42\x11d43\x11d44",
108c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif
109c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes};
110c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
111c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}  // namespace
112c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
113c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF8AndWide) {
114c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // we round-trip all the wide strings through UTF-8 to make sure everything
115c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // agrees on the conversion. This uses the stream operators to test them
116c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // simultaneously.
117c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
118c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    std::ostringstream utf8;
119c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    utf8 << WideToUTF8(kConvertRoundtripCases[i]);
120c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    std::wostringstream wide;
121c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    wide << UTF8ToWide(utf8.str());
122c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
123c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    EXPECT_EQ(kConvertRoundtripCases[i], wide.str());
124c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  }
125c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
126c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
127c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF8AndWideEmptyString) {
128c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // An empty std::wstring should be converted to an empty std::string,
129c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // and vice versa.
130c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::wstring wempty;
131c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::string empty;
132c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(empty, WideToUTF8(wempty));
133c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(wempty, UTF8ToWide(empty));
134c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
135c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
136c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF8ToWide) {
137c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  struct UTF8ToWideCase {
138c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    const char* utf8;
139c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    const wchar_t* wide;
140c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    bool success;
141c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  } convert_cases[] = {
142c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Regular UTF-8 input.
143c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},
144c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Non-character is passed through.
145c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"\xef\xbf\xbfHello", L"\xffffHello", true},
146c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Truncated UTF-8 sequence.
147c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},
148c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Truncated off the end.
149c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false},
150c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Non-shortest-form UTF-8.
151c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},
152c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
153c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Note that for whatever reason, this test fails on Windows XP.
154c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"\xed\xb0\x80", L"\xfffd", false},
155c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Non-BMP characters. The second is a non-character regarded as valid.
156c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // The result will either be in UTF-16 or UTF-32.
157c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if defined(WCHAR_T_IS_UTF16)
158c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},
159c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},
160c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#elif defined(WCHAR_T_IS_UTF32)
161c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},
162c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},
163c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif
164c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  };
165c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
166c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  for (size_t i = 0; i < arraysize(convert_cases); i++) {
167c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    std::wstring converted;
168d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low    errno = 0;
169c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    const bool success = UTF8ToWide(convert_cases[i].utf8,
170c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes                                    strlen(convert_cases[i].utf8),
171c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes                                    &converted);
172c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    EXPECT_EQ(convert_cases[i].success, success);
173c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // The original test always compared expected and converted, but don't do
174c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // that because our implementation of UTF8ToWide() does not guarantee to
175c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // produce the same output in error situations.
176c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    if (success) {
177c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes      std::wstring expected(convert_cases[i].wide);
178c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes      EXPECT_EQ(expected, converted);
179d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low    } else {
180d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low      EXPECT_EQ(EILSEQ, errno);
181c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    }
182c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  }
183c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
184c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Manually test an embedded NULL.
185c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::wstring converted;
186c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted));
187c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  ASSERT_EQ(3U, converted.length());
188c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(static_cast<wchar_t>(0), converted[0]);
189c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ('Z', converted[1]);
190c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ('\t', converted[2]);
191c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
192c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Make sure that conversion replaces, not appends.
193c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_TRUE(UTF8ToWide("B", 1, &converted));
194c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  ASSERT_EQ(1U, converted.length());
195c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ('B', converted[0]);
196c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
197c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
198c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if defined(WCHAR_T_IS_UTF16)
199c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// This test is only valid when wchar_t == UTF-16.
200c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) {
201c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  struct WideToUTF8Case {
202c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    const wchar_t* utf16;
203c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    const char* utf8;
204c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    bool success;
205c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  } convert_cases[] = {
206c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Regular UTF-16 input.
207c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
208c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Test a non-BMP character.
209c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},
210c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Non-characters are passed through.
211c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\xffffHello", "\xEF\xBF\xBFHello", true},
212c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},
213c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // The first character is a truncated UTF-16 character.
214c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Note that for whatever reason, this test fails on Windows XP.
215c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd",
216c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if (WINVER >= 0x0600)
217c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Only Vista and later has a new API/flag that correctly returns false.
218c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    false
219c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#else
220c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    true
221c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif
222c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    },
223c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Truncated at the end.
224c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Note that for whatever reason, this test fails on Windows XP.
225c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd",
226c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#if (WINVER >= 0x0600)
227c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Only Vista and later has a new API/flag that correctly returns false.
228c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    false
229c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#else
230c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    true
231c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif
232c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    },
233c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  };
234c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
235c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  for (size_t i = 0; i < arraysize(convert_cases); i++) {
236c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    std::string converted;
237d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low    errno = 0;
238c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    const bool success = WideToUTF8(convert_cases[i].utf16,
239c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes                                    wcslen(convert_cases[i].utf16),
240c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes                                    &converted);
241c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    EXPECT_EQ(convert_cases[i].success, success);
242c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // The original test always compared expected and converted, but don't do
243c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // that because our implementation of WideToUTF8() does not guarantee to
244c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // produce the same output in error situations.
245c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    if (success) {
246c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes      std::string expected(convert_cases[i].utf8);
247c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes      EXPECT_EQ(expected, converted);
248d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low    } else {
249d21dc825bbecad6ce480c5e5c574cc77eadcd779Spencer Low      EXPECT_EQ(EILSEQ, errno);
250c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    }
251c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  }
252c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
253c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
254c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#elif defined(WCHAR_T_IS_UTF32)
255c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// This test is only valid when wchar_t == UTF-32.
256c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) {
257c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  struct WideToUTF8Case {
258c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    const wchar_t* utf32;
259c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    const char* utf8;
260c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    bool success;
261c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  } convert_cases[] = {
262c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Regular 16-bit input.
263c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
264c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Test a non-BMP character.
265c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},
266c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Non-characters are passed through.
267c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\xffffHello", "\xEF\xBF\xBFHello", true},
268c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},
269c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // Invalid Unicode code points.
270c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\xfffffffHello", "\xEF\xBF\xBDHello", false},
271c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    // The first character is a truncated UTF-16 character.
272c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false},
273c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    {L"\xdc01Hello", "\xef\xbf\xbdHello", false},
274c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  };
275c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
276c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  for (size_t i = 0; i < arraysize(convert_cases); i++) {
277c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    std::string converted;
278c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    EXPECT_EQ(convert_cases[i].success,
279c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes              WideToUTF8(convert_cases[i].utf32,
280c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes                         wcslen(convert_cases[i].utf32),
281c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes                         &converted));
282c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    std::string expected(convert_cases[i].utf8);
283c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    EXPECT_EQ(expected, converted);
284c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  }
285c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
286c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif  // defined(WCHAR_T_IS_UTF32)
287c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
288c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// The test below uses these types and functions, so just do enough to get the
289c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// test running.
290c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughestypedef wchar_t char16;
291c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughestypedef std::wstring string16;
292c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
293c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughestemplate<typename T>
294c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic void* WriteInto(T* t, size_t size) {
295c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // std::(w)string::resize() already includes space for a NULL terminator.
296c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  t->resize(size - 1);
297c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  return &(*t)[0];
298c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
299c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
300c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// A stub implementation that calls a helper from above, just to get the test
301c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// below working. This is just for testing and should not be moved to base
302c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// because this ignores errors which is probably not a good idea, plus it takes
303c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// a string16 type which we don't really have.
304c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::string UTF16ToUTF8(const string16& utf16) {
305c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  return WideToUTF8(utf16);
306c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
307c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
308c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(UTFStringConversionsTest, ConvertMultiString) {
309c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  static char16 multi16[] = {
310c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    'f', 'o', 'o', '\0',
311c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    'b', 'a', 'r', '\0',
312c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    'b', 'a', 'z', '\0',
313c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    '\0'
314c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  };
315c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  static char multi[] = {
316c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    'f', 'o', 'o', '\0',
317c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    'b', 'a', 'r', '\0',
318c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    'b', 'a', 'z', '\0',
319c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes    '\0'
320c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  };
321c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  string16 multistring16;
322c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  memcpy(WriteInto(&multistring16, arraysize(multi16)), multi16,
323c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes                   sizeof(multi16));
324c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(arraysize(multi16) - 1, multistring16.length());
325c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::string expected;
326c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));
327c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(arraysize(multi) - 1, expected.length());
328c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  const std::string& converted = UTF16ToUTF8(multistring16);
329c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(arraysize(multi) - 1, converted.length());
330c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(expected, converted);
331c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
332c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
333c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// The tests below from sys_string_conversions_unittest.cc call SysWideToUTF8()
334c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// and SysUTF8ToWide(), so these are stub implementations that call the helpers
335c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// above. These are just for testing and should not be moved to base because
336c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// they ignore errors which is probably not a good idea.
337c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
338c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::string SysWideToUTF8(const std::wstring& utf16) {
339c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  return WideToUTF8(utf16);
340c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
341c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
342c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic std::wstring SysUTF8ToWide(const std::string& utf8) {
343c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  return UTF8ToWide(utf8);
344c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
345c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
346c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Below is adapted from https://chromium.googlesource.com/chromium/src/+/master/base/strings/sys_string_conversions_unittest.cc
347c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
348c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Copyright (c) 2011 The Chromium Authors. All rights reserved.
349c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// Use of this source code is governed by a BSD-style license that can be
350c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes// found in the LICENSE file.
351c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
352c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#ifdef WCHAR_T_IS_UTF32
353c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic const std::wstring kSysWideOldItalicLetterA = L"\x10300";
354c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#else
355c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughesstatic const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00";
356c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes#endif
357c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
358c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(SysStrings, SysWideToUTF8) {
359c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world"));
360c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d"));
361c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
362c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // >16 bits
363c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA));
364c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
365c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Error case. When Windows finds a UTF-16 character going off the end of
366c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // a string, it just converts that literal value to UTF-8, even though this
367c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // is invalid.
368c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  //
369c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // This is what XP does, but Vista has different behavior, so we don't bother
370c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // verifying it:
371c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",
372c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  //           SysWideToUTF8(L"\x4f60\xd800zyxw"));
373c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
374c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Test embedded NULLs.
375c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::wstring wide_null(L"a");
376c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  wide_null.push_back(0);
377c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  wide_null.push_back('b');
378c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
379c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::string expected_null("a");
380c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  expected_null.push_back(0);
381c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  expected_null.push_back('b');
382c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
383c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(expected_null, SysWideToUTF8(wide_null));
384c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
385c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
386c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott HughesTEST(SysStrings, SysUTF8ToWide) {
387c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world"));
388c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));
389c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // >16 bits
390c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80"));
391c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
392c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Error case. When Windows finds an invalid UTF-8 character, it just skips
393c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // it. This seems weird because it's inconsistent with the reverse conversion.
394c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  //
395c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // This is what XP does, but Vista has different behavior, so we don't bother
396c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // verifying it:
397c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));
398c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
399c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  // Test embedded NULLs.
400c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::string utf8_null("a");
401c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  utf8_null.push_back(0);
402c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  utf8_null.push_back('b');
403c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
404c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  std::wstring expected_null(L"a");
405c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  expected_null.push_back(0);
406c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  expected_null.push_back('b');
407c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
408c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes  EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null));
409c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}
410c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes
411c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}  // namespace base
412c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3Elliott Hughes}  // namespace android
413