1010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved. 2010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// found in the LICENSE file. 4010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 5010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "base/macros.h" 6010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "testing/gtest/include/gtest/gtest.h" 7010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "third_party/icu/source/common/unicode/ucnv.h" 8010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "url/url_canon.h" 9010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "url/url_canon_icu.h" 10010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "url/url_canon_stdstring.h" 11010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include "url/url_test_utils.h" 12010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 13010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Some implementations of base/basictypes.h may define ARRAYSIZE. 14010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro 15010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// which is in our version of basictypes.h. 16010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#ifndef ARRAYSIZE 17010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#define ARRAYSIZE ARRAYSIZE_UNSAFE 18010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#endif 19010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 20010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)namespace url { 21010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 22010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)using test_utils::WStringToUTF16; 23010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 24010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)namespace { 25010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 26010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Wrapper around a UConverter object that managers creation and destruction. 27010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)class UConvScoper { 28010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) public: 29010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) explicit UConvScoper(const char* charset_name) { 30010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) UErrorCode err = U_ZERO_ERROR; 31010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) converter_ = ucnv_open(charset_name, &err); 32010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 33010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 34010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ~UConvScoper() { 35010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) if (converter_) 36010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ucnv_close(converter_); 37010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 38010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 39010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Returns the converter object, may be NULL. 40010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) UConverter* converter() const { return converter_; } 41010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 42010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) private: 43010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) UConverter* converter_; 44010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)}; 45010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 46010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)TEST(URLCanonIcuTest, ICUCharsetConverter) { 47010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) struct ICUCase { 48010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const wchar_t* input; 49010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const char* encoding; 50010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const char* expected; 51010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } icu_cases[] = { 52010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // UTF-8. 53010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {L"Hello, world", "utf-8", "Hello, world"}, 54010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"}, 55010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Non-BMP UTF-8. 56010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"}, 57010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Big5 58010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"}, 59010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Unrepresentable character in the destination set. 60010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {L"hello\x4f60\x06de\x597dworld", "big5", 61010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) "hello\xa7\x41%26%231758%3B\xa6\x6eworld"}, 62010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) }; 63010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 64010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) { 65010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) UConvScoper conv(icu_cases[i].encoding); 66010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ASSERT_TRUE(conv.converter() != NULL); 67010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ICUCharsetConverter converter(conv.converter()); 68010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 69010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) std::string str; 70010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) StdStringCanonOutput output(&str); 71010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 72010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) base::string16 input_str(WStringToUTF16(icu_cases[i].input)); 73010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) int input_len = static_cast<int>(input_str.length()); 74010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); 75010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) output.Complete(); 76010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 77010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) EXPECT_STREQ(icu_cases[i].expected, str.c_str()); 78010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 79010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 80010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Test string sizes around the resize boundary for the output to make sure 81010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // the converter resizes as needed. 82010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const int static_size = 16; 83010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) UConvScoper conv("utf-8"); 84010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ASSERT_TRUE(conv.converter()); 85010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ICUCharsetConverter converter(conv.converter()); 86010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) for (int i = static_size - 2; i <= static_size + 2; i++) { 87010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Make a string with the appropriate length. 88010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) base::string16 input; 89010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) for (int ch = 0; ch < i; ch++) 90010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) input.push_back('a'); 91010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 92010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) RawCanonOutput<static_size> output; 93010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()), 94010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) &output); 95010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) EXPECT_EQ(input.length(), static_cast<size_t>(output.length())); 96010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 97010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)} 98010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 99010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)TEST(URLCanonIcuTest, QueryWithConverter) { 100010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) struct QueryCase { 101010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const char* input8; 102010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const wchar_t* input16; 103010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const char* encoding; 104010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const char* expected; 105010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } query_cases[] = { 106010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Regular ASCII case in some different encodings. 107010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"}, 108010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"}, 109010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"}, 110010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Chinese input/output 111010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", 112010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) "?q=%C4%E3%BA%C3"}, 113010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"}, 114010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Unencodable character in the destination character set should be 115010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // escaped. The escape sequence unescapes to be the entity name: 116010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // "?q=你" 117010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", 118010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) "?q=Chinese%26%2365319%3B"}, 119010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) }; 120010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 121010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) { 122010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) Component out_comp; 123010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 124010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) UConvScoper conv(query_cases[i].encoding); 125010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ASSERT_TRUE(!query_cases[i].encoding || conv.converter()); 126010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ICUCharsetConverter converter(conv.converter()); 127010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 128010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) if (query_cases[i].input8) { 129010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) int len = static_cast<int>(strlen(query_cases[i].input8)); 130010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) Component in_comp(0, len); 131010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) std::string out_str; 132010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 133010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) StdStringCanonOutput output(&out_str); 134010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output, 135010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) &out_comp); 136010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) output.Complete(); 137010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 138010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) EXPECT_EQ(query_cases[i].expected, out_str); 139010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 140010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 141010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) if (query_cases[i].input16) { 142010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) base::string16 input16(WStringToUTF16(query_cases[i].input16)); 143010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) int len = static_cast<int>(input16.length()); 144010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) Component in_comp(0, len); 145010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) std::string out_str; 146010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 147010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) StdStringCanonOutput output(&out_str); 148010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output, 149010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) &out_comp); 150010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) output.Complete(); 151010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 152010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) EXPECT_EQ(query_cases[i].expected, out_str); 153010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 154010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 155010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 156010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Extra test for input with embedded NULL; 157010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) std::string out_str; 158010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) StdStringCanonOutput output(&out_str); 159010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) Component out_comp; 160010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp); 161010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) output.Complete(); 162010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) EXPECT_EQ("?a%20%00z%01", out_str); 163010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)} 164010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 165010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)} // namespace 166010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 167010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)} // namespace url 168