1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "testing/gtest/include/gtest/gtest.h" 6#include "url/url_canon.h" 7#include "url/url_canon_stdstring.h" 8#include "url/url_parse.h" 9#include "url/url_test_utils.h" 10#include "url/url_util.h" 11 12TEST(URLUtilTest, FindAndCompareScheme) { 13 url_parse::Component found_scheme; 14 15 // Simple case where the scheme is found and matches. 16 const char kStr1[] = "http://www.com/"; 17 EXPECT_TRUE(url_util::FindAndCompareScheme( 18 kStr1, static_cast<int>(strlen(kStr1)), "http", NULL)); 19 EXPECT_TRUE(url_util::FindAndCompareScheme( 20 kStr1, static_cast<int>(strlen(kStr1)), "http", &found_scheme)); 21 EXPECT_TRUE(found_scheme == url_parse::Component(0, 4)); 22 23 // A case where the scheme is found and doesn't match. 24 EXPECT_FALSE(url_util::FindAndCompareScheme( 25 kStr1, static_cast<int>(strlen(kStr1)), "https", &found_scheme)); 26 EXPECT_TRUE(found_scheme == url_parse::Component(0, 4)); 27 28 // A case where there is no scheme. 29 const char kStr2[] = "httpfoobar"; 30 EXPECT_FALSE(url_util::FindAndCompareScheme( 31 kStr2, static_cast<int>(strlen(kStr2)), "http", &found_scheme)); 32 EXPECT_TRUE(found_scheme == url_parse::Component()); 33 34 // When there is an empty scheme, it should match the empty scheme. 35 const char kStr3[] = ":foo.com/"; 36 EXPECT_TRUE(url_util::FindAndCompareScheme( 37 kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme)); 38 EXPECT_TRUE(found_scheme == url_parse::Component(0, 0)); 39 40 // But when there is no scheme, it should fail. 41 EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme)); 42 EXPECT_TRUE(found_scheme == url_parse::Component()); 43 44 // When there is a whitespace char in scheme, it should canonicalize the url 45 // before comparison. 46 const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)"; 47 EXPECT_TRUE(url_util::FindAndCompareScheme( 48 whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript", 49 &found_scheme)); 50 EXPECT_TRUE(found_scheme == url_parse::Component(1, 10)); 51 52 // Control characters should be stripped out on the ends, and kept in the 53 // middle. 54 const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)"; 55 EXPECT_FALSE(url_util::FindAndCompareScheme( 56 ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript", 57 &found_scheme)); 58 EXPECT_TRUE(found_scheme == url_parse::Component(1, 11)); 59} 60 61TEST(URLUtilTest, ReplaceComponents) { 62 url_parse::Parsed parsed; 63 url_canon::RawCanonOutputT<char> output; 64 url_parse::Parsed new_parsed; 65 66 // Check that the following calls do not cause crash 67 url_canon::Replacements<char> replacements; 68 replacements.SetRef("test", url_parse::Component(0, 4)); 69 url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, 70 &new_parsed); 71 url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output, 72 &new_parsed); 73 replacements.ClearRef(); 74 replacements.SetHost("test", url_parse::Component(0, 4)); 75 url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, 76 &new_parsed); 77 url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output, 78 &new_parsed); 79 80 replacements.ClearHost(); 81 url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, 82 &new_parsed); 83 url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output, 84 &new_parsed); 85 url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, 86 &new_parsed); 87 url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output, 88 &new_parsed); 89} 90 91static std::string CheckReplaceScheme(const char* base_url, 92 const char* scheme) { 93 // Make sure the input is canonicalized. 94 url_canon::RawCanonOutput<32> original; 95 url_parse::Parsed original_parsed; 96 url_util::Canonicalize(base_url, strlen(base_url), NULL, 97 &original, &original_parsed); 98 99 url_canon::Replacements<char> replacements; 100 replacements.SetScheme(scheme, url_parse::Component(0, strlen(scheme))); 101 102 std::string output_string; 103 url_canon::StdStringCanonOutput output(&output_string); 104 url_parse::Parsed output_parsed; 105 url_util::ReplaceComponents(original.data(), original.length(), 106 original_parsed, replacements, NULL, 107 &output, &output_parsed); 108 109 output.Complete(); 110 return output_string; 111} 112 113TEST(URLUtilTest, ReplaceScheme) { 114 EXPECT_EQ("https://google.com/", 115 CheckReplaceScheme("http://google.com/", "https")); 116 EXPECT_EQ("file://google.com/", 117 CheckReplaceScheme("http://google.com/", "file")); 118 EXPECT_EQ("http://home/Build", 119 CheckReplaceScheme("file:///Home/Build", "http")); 120 EXPECT_EQ("javascript:foo", 121 CheckReplaceScheme("about:foo", "javascript")); 122 EXPECT_EQ("://google.com/", 123 CheckReplaceScheme("http://google.com/", "")); 124 EXPECT_EQ("http://google.com/", 125 CheckReplaceScheme("about:google.com", "http")); 126 EXPECT_EQ("http:", CheckReplaceScheme("", "http")); 127 128#ifdef WIN32 129 // Magic Windows drive letter behavior when converting to a file URL. 130 EXPECT_EQ("file:///E:/foo/", 131 CheckReplaceScheme("http://localhost/e:foo/", "file")); 132#endif 133 134 // This will probably change to "about://google.com/" when we fix 135 // http://crbug.com/160 which should also be an acceptable result. 136 EXPECT_EQ("about://google.com/", 137 CheckReplaceScheme("http://google.com/", "about")); 138} 139 140TEST(URLUtilTest, DecodeURLEscapeSequences) { 141 struct DecodeCase { 142 const char* input; 143 const char* output; 144 } decode_cases[] = { 145 {"hello, world", "hello, world"}, 146 {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/", 147 "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"}, 148 {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/", 149 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"}, 150 {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/", 151 " !\"#$%&'()*+,-.//"}, 152 {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/", 153 "0123456789:;<=>?/"}, 154 {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/", 155 "@ABCDEFGHIJKLMNO/"}, 156 {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/", 157 "PQRSTUVWXYZ[\\]^_/"}, 158 {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/", 159 "`abcdefghijklmno/"}, 160 {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/", 161 "pqrstuvwxyz{|}~\x7f/"}, 162 // Test un-UTF-8-ization. 163 {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"}, 164 }; 165 166 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(decode_cases); i++) { 167 const char* input = decode_cases[i].input; 168 url_canon::RawCanonOutputT<base::char16> output; 169 url_util::DecodeURLEscapeSequences(input, strlen(input), &output); 170 EXPECT_EQ(decode_cases[i].output, 171 url_test_utils::ConvertUTF16ToUTF8( 172 base::string16(output.data(), output.length()))); 173 } 174 175 // Our decode should decode %00 176 const char zero_input[] = "%00"; 177 url_canon::RawCanonOutputT<base::char16> zero_output; 178 url_util::DecodeURLEscapeSequences(zero_input, strlen(zero_input), 179 &zero_output); 180 EXPECT_NE("%00", 181 url_test_utils::ConvertUTF16ToUTF8( 182 base::string16(zero_output.data(), zero_output.length()))); 183 184 // Test the error behavior for invalid UTF-8. 185 const char invalid_input[] = "%e4%a0%e5%a5%bd"; 186 const base::char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0}; 187 url_canon::RawCanonOutputT<base::char16> invalid_output; 188 url_util::DecodeURLEscapeSequences(invalid_input, strlen(invalid_input), 189 &invalid_output); 190 EXPECT_EQ(base::string16(invalid_expected), 191 base::string16(invalid_output.data(), invalid_output.length())); 192} 193 194TEST(URLUtilTest, TestEncodeURIComponent) { 195 struct EncodeCase { 196 const char* input; 197 const char* output; 198 } encode_cases[] = { 199 {"hello, world", "hello%2C%20world"}, 200 {"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", 201 "%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"}, 202 {"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", 203 "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"}, 204 {" !\"#$%&'()*+,-./", 205 "%20!%22%23%24%25%26'()*%2B%2C-.%2F"}, 206 {"0123456789:;<=>?", 207 "0123456789%3A%3B%3C%3D%3E%3F"}, 208 {"@ABCDEFGHIJKLMNO", 209 "%40ABCDEFGHIJKLMNO"}, 210 {"PQRSTUVWXYZ[\\]^_", 211 "PQRSTUVWXYZ%5B%5C%5D%5E_"}, 212 {"`abcdefghijklmno", 213 "%60abcdefghijklmno"}, 214 {"pqrstuvwxyz{|}~\x7f", 215 "pqrstuvwxyz%7B%7C%7D~%7F"}, 216 }; 217 218 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(encode_cases); i++) { 219 const char* input = encode_cases[i].input; 220 url_canon::RawCanonOutputT<char> buffer; 221 url_util::EncodeURIComponent(input, strlen(input), &buffer); 222 std::string output(buffer.data(), buffer.length()); 223 EXPECT_EQ(encode_cases[i].output, output); 224 } 225} 226 227TEST(URLUtilTest, TestResolveRelativeWithNonStandardBase) { 228 // This tests non-standard (in the sense that GURL::IsStandard() == false) 229 // hierarchical schemes. 230 struct ResolveRelativeCase { 231 const char* base; 232 const char* rel; 233 bool is_valid; 234 const char* out; 235 } resolve_non_standard_cases[] = { 236 // Resolving a relative path against a non-hierarchical URL should fail. 237 {"scheme:opaque_data", "/path", false, ""}, 238 // Resolving a relative path against a non-standard authority-based base 239 // URL doesn't alter the authority section. 240 {"scheme://Authority/", "../path", true, "scheme://Authority/path"}, 241 // A non-standard hierarchical base is resolved with path URL 242 // canoncialization rules. 243 {"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"}, 244 {"data:/Path/../part/part2", "file.html", true, "data:/Path/../part/file.html"}, 245 // Path URL canonicalization rules also apply to non-standard authority- 246 // based URLs. 247 {"custom://Authority/", "file.html", true, "custom://Authority/file.html"}, 248 {"custom://Authority/", "other://Auth/", true, "other://Auth/"}, 249 {"custom://Authority/", "../../file.html", true, "custom://Authority/file.html"}, 250 {"custom://Authority/path/", "file.html", true, "custom://Authority/path/file.html"}, 251 {"custom://Authority:NoCanon/path/", "file.html", true, "custom://Authority:NoCanon/path/file.html"}, 252 // It's still possible to get an invalid path URL. 253 {"custom://Invalid:!#Auth/", "file.html", false, ""}, 254 // A path with an authority section gets canonicalized under standard URL 255 // rules, even though the base was non-standard. 256 {"content://content.Provider/", "//other.Provider", true, "content://other.provider/"}, 257 // Resolving an absolute URL doesn't cause canonicalization of the 258 // result. 259 {"about:blank", "custom://Authority", true, "custom://Authority"}, 260 // Resolving should fail if the base URL is authority-based but is 261 // missing a path component (the '/' at the end). 262 {"scheme://Authority", "path", false, ""}, 263 // Test resolving a fragment (only) against any kind of base-URL. 264 {"about:blank", "#id42", true, "about:blank#id42" }, 265 {"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag" }, 266 // A surprising side effect of allowing fragments to resolve against 267 // any URL scheme is we might break javascript: URLs by doing so... 268 {"javascript:alert('foo#bar')", "#badfrag", true, 269 "javascript:alert('foo#badfrag" }, 270 }; 271 272 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(resolve_non_standard_cases); i++) { 273 const ResolveRelativeCase& test_data = resolve_non_standard_cases[i]; 274 url_parse::Parsed base_parsed; 275 url_parse::ParsePathURL(test_data.base, strlen(test_data.base), 276 &base_parsed); 277 278 std::string resolved; 279 url_canon::StdStringCanonOutput output(&resolved); 280 url_parse::Parsed resolved_parsed; 281 bool valid = 282 url_util::ResolveRelative(test_data.base, strlen(test_data.base), 283 base_parsed, 284 test_data.rel, strlen(test_data.rel), 285 NULL, &output, &resolved_parsed); 286 output.Complete(); 287 288 EXPECT_EQ(test_data.is_valid, valid) << i; 289 if (test_data.is_valid && valid) 290 EXPECT_EQ(test_data.out, resolved) << i; 291 } 292} 293