1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "testing/gtest/include/gtest/gtest.h"
6#include "url/url_canon.h"
7#include "url/url_canon_stdstring.h"
8#include "url/url_parse.h"
9#include "url/url_test_utils.h"
10#include "url/url_util.h"
11
12TEST(URLUtilTest, FindAndCompareScheme) {
13  url_parse::Component found_scheme;
14
15  // Simple case where the scheme is found and matches.
16  const char kStr1[] = "http://www.com/";
17  EXPECT_TRUE(url_util::FindAndCompareScheme(
18      kStr1, static_cast<int>(strlen(kStr1)), "http", NULL));
19  EXPECT_TRUE(url_util::FindAndCompareScheme(
20      kStr1, static_cast<int>(strlen(kStr1)), "http", &found_scheme));
21  EXPECT_TRUE(found_scheme == url_parse::Component(0, 4));
22
23  // A case where the scheme is found and doesn't match.
24  EXPECT_FALSE(url_util::FindAndCompareScheme(
25      kStr1, static_cast<int>(strlen(kStr1)), "https", &found_scheme));
26  EXPECT_TRUE(found_scheme == url_parse::Component(0, 4));
27
28  // A case where there is no scheme.
29  const char kStr2[] = "httpfoobar";
30  EXPECT_FALSE(url_util::FindAndCompareScheme(
31      kStr2, static_cast<int>(strlen(kStr2)), "http", &found_scheme));
32  EXPECT_TRUE(found_scheme == url_parse::Component());
33
34  // When there is an empty scheme, it should match the empty scheme.
35  const char kStr3[] = ":foo.com/";
36  EXPECT_TRUE(url_util::FindAndCompareScheme(
37      kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme));
38  EXPECT_TRUE(found_scheme == url_parse::Component(0, 0));
39
40  // But when there is no scheme, it should fail.
41  EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme));
42  EXPECT_TRUE(found_scheme == url_parse::Component());
43
44  // When there is a whitespace char in scheme, it should canonicalize the url
45  // before comparison.
46  const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
47  EXPECT_TRUE(url_util::FindAndCompareScheme(
48      whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript",
49      &found_scheme));
50  EXPECT_TRUE(found_scheme == url_parse::Component(1, 10));
51
52  // Control characters should be stripped out on the ends, and kept in the
53  // middle.
54  const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)";
55  EXPECT_FALSE(url_util::FindAndCompareScheme(
56      ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript",
57      &found_scheme));
58  EXPECT_TRUE(found_scheme == url_parse::Component(1, 11));
59}
60
61TEST(URLUtilTest, ReplaceComponents) {
62  url_parse::Parsed parsed;
63  url_canon::RawCanonOutputT<char> output;
64  url_parse::Parsed new_parsed;
65
66  // Check that the following calls do not cause crash
67  url_canon::Replacements<char> replacements;
68  replacements.SetRef("test", url_parse::Component(0, 4));
69  url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output,
70                              &new_parsed);
71  url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output,
72                              &new_parsed);
73  replacements.ClearRef();
74  replacements.SetHost("test", url_parse::Component(0, 4));
75  url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output,
76                              &new_parsed);
77  url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output,
78                              &new_parsed);
79
80  replacements.ClearHost();
81  url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output,
82                              &new_parsed);
83  url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output,
84                              &new_parsed);
85  url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output,
86                              &new_parsed);
87  url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output,
88                              &new_parsed);
89}
90
91static std::string CheckReplaceScheme(const char* base_url,
92                                      const char* scheme) {
93  // Make sure the input is canonicalized.
94  url_canon::RawCanonOutput<32> original;
95  url_parse::Parsed original_parsed;
96  url_util::Canonicalize(base_url, strlen(base_url), NULL,
97                         &original, &original_parsed);
98
99  url_canon::Replacements<char> replacements;
100  replacements.SetScheme(scheme, url_parse::Component(0, strlen(scheme)));
101
102  std::string output_string;
103  url_canon::StdStringCanonOutput output(&output_string);
104  url_parse::Parsed output_parsed;
105  url_util::ReplaceComponents(original.data(), original.length(),
106                              original_parsed, replacements, NULL,
107                              &output, &output_parsed);
108
109  output.Complete();
110  return output_string;
111}
112
113TEST(URLUtilTest, ReplaceScheme) {
114  EXPECT_EQ("https://google.com/",
115            CheckReplaceScheme("http://google.com/", "https"));
116  EXPECT_EQ("file://google.com/",
117            CheckReplaceScheme("http://google.com/", "file"));
118  EXPECT_EQ("http://home/Build",
119            CheckReplaceScheme("file:///Home/Build", "http"));
120  EXPECT_EQ("javascript:foo",
121            CheckReplaceScheme("about:foo", "javascript"));
122  EXPECT_EQ("://google.com/",
123            CheckReplaceScheme("http://google.com/", ""));
124  EXPECT_EQ("http://google.com/",
125            CheckReplaceScheme("about:google.com", "http"));
126  EXPECT_EQ("http:", CheckReplaceScheme("", "http"));
127
128#ifdef WIN32
129  // Magic Windows drive letter behavior when converting to a file URL.
130  EXPECT_EQ("file:///E:/foo/",
131            CheckReplaceScheme("http://localhost/e:foo/", "file"));
132#endif
133
134  // This will probably change to "about://google.com/" when we fix
135  // http://crbug.com/160 which should also be an acceptable result.
136  EXPECT_EQ("about://google.com/",
137            CheckReplaceScheme("http://google.com/", "about"));
138}
139
140TEST(URLUtilTest, DecodeURLEscapeSequences) {
141  struct DecodeCase {
142    const char* input;
143    const char* output;
144  } decode_cases[] = {
145    {"hello, world", "hello, world"},
146    {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/",
147     "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"},
148    {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/",
149     "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"},
150    {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/",
151     " !\"#$%&'()*+,-.//"},
152    {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/",
153     "0123456789:;<=>?/"},
154    {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/",
155     "@ABCDEFGHIJKLMNO/"},
156    {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/",
157     "PQRSTUVWXYZ[\\]^_/"},
158    {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/",
159     "`abcdefghijklmno/"},
160    {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
161     "pqrstuvwxyz{|}~\x7f/"},
162    // Test un-UTF-8-ization.
163    {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
164  };
165
166  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(decode_cases); i++) {
167    const char* input = decode_cases[i].input;
168    url_canon::RawCanonOutputT<base::char16> output;
169    url_util::DecodeURLEscapeSequences(input, strlen(input), &output);
170    EXPECT_EQ(decode_cases[i].output,
171              url_test_utils::ConvertUTF16ToUTF8(
172                base::string16(output.data(), output.length())));
173  }
174
175  // Our decode should decode %00
176  const char zero_input[] = "%00";
177  url_canon::RawCanonOutputT<base::char16> zero_output;
178  url_util::DecodeURLEscapeSequences(zero_input, strlen(zero_input),
179                                     &zero_output);
180  EXPECT_NE("%00",
181            url_test_utils::ConvertUTF16ToUTF8(
182              base::string16(zero_output.data(), zero_output.length())));
183
184  // Test the error behavior for invalid UTF-8.
185  const char invalid_input[] = "%e4%a0%e5%a5%bd";
186  const base::char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0};
187  url_canon::RawCanonOutputT<base::char16> invalid_output;
188  url_util::DecodeURLEscapeSequences(invalid_input, strlen(invalid_input),
189                                     &invalid_output);
190  EXPECT_EQ(base::string16(invalid_expected),
191            base::string16(invalid_output.data(), invalid_output.length()));
192}
193
194TEST(URLUtilTest, TestEncodeURIComponent) {
195  struct EncodeCase {
196    const char* input;
197    const char* output;
198  } encode_cases[] = {
199    {"hello, world", "hello%2C%20world"},
200    {"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
201     "%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"},
202    {"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
203     "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"},
204    {" !\"#$%&'()*+,-./",
205     "%20!%22%23%24%25%26'()*%2B%2C-.%2F"},
206    {"0123456789:;<=>?",
207     "0123456789%3A%3B%3C%3D%3E%3F"},
208    {"@ABCDEFGHIJKLMNO",
209     "%40ABCDEFGHIJKLMNO"},
210    {"PQRSTUVWXYZ[\\]^_",
211     "PQRSTUVWXYZ%5B%5C%5D%5E_"},
212    {"`abcdefghijklmno",
213     "%60abcdefghijklmno"},
214    {"pqrstuvwxyz{|}~\x7f",
215     "pqrstuvwxyz%7B%7C%7D~%7F"},
216  };
217
218  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(encode_cases); i++) {
219    const char* input = encode_cases[i].input;
220    url_canon::RawCanonOutputT<char> buffer;
221    url_util::EncodeURIComponent(input, strlen(input), &buffer);
222    std::string output(buffer.data(), buffer.length());
223    EXPECT_EQ(encode_cases[i].output, output);
224  }
225}
226
227TEST(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
228  // This tests non-standard (in the sense that GURL::IsStandard() == false)
229  // hierarchical schemes.
230  struct ResolveRelativeCase {
231    const char* base;
232    const char* rel;
233    bool is_valid;
234    const char* out;
235  } resolve_non_standard_cases[] = {
236      // Resolving a relative path against a non-hierarchical URL should fail.
237    {"scheme:opaque_data", "/path", false, ""},
238      // Resolving a relative path against a non-standard authority-based base
239      // URL doesn't alter the authority section.
240    {"scheme://Authority/", "../path", true, "scheme://Authority/path"},
241      // A non-standard hierarchical base is resolved with path URL
242      // canoncialization rules.
243    {"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"},
244    {"data:/Path/../part/part2", "file.html", true, "data:/Path/../part/file.html"},
245      // Path URL canonicalization rules also apply to non-standard authority-
246      // based URLs.
247    {"custom://Authority/", "file.html", true, "custom://Authority/file.html"},
248    {"custom://Authority/", "other://Auth/", true, "other://Auth/"},
249    {"custom://Authority/", "../../file.html", true, "custom://Authority/file.html"},
250    {"custom://Authority/path/", "file.html", true, "custom://Authority/path/file.html"},
251    {"custom://Authority:NoCanon/path/", "file.html", true, "custom://Authority:NoCanon/path/file.html"},
252      // It's still possible to get an invalid path URL.
253    {"custom://Invalid:!#Auth/", "file.html", false, ""},
254      // A path with an authority section gets canonicalized under standard URL
255      // rules, even though the base was non-standard.
256    {"content://content.Provider/", "//other.Provider", true, "content://other.provider/"},
257      // Resolving an absolute URL doesn't cause canonicalization of the
258      // result.
259    {"about:blank", "custom://Authority", true, "custom://Authority"},
260      // Resolving should fail if the base URL is authority-based but is
261      // missing a path component (the '/' at the end).
262    {"scheme://Authority", "path", false, ""},
263      // Test resolving a fragment (only) against any kind of base-URL.
264    {"about:blank", "#id42", true, "about:blank#id42" },
265    {"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag" },
266      // A surprising side effect of allowing fragments to resolve against
267      // any URL scheme is we might break javascript: URLs by doing so...
268    {"javascript:alert('foo#bar')", "#badfrag", true,
269      "javascript:alert('foo#badfrag" },
270  };
271
272  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(resolve_non_standard_cases); i++) {
273    const ResolveRelativeCase& test_data = resolve_non_standard_cases[i];
274    url_parse::Parsed base_parsed;
275    url_parse::ParsePathURL(test_data.base, strlen(test_data.base),
276                            &base_parsed);
277
278    std::string resolved;
279    url_canon::StdStringCanonOutput output(&resolved);
280    url_parse::Parsed resolved_parsed;
281    bool valid =
282        url_util::ResolveRelative(test_data.base, strlen(test_data.base),
283                                  base_parsed,
284                                  test_data.rel, strlen(test_data.rel),
285                                  NULL, &output, &resolved_parsed);
286    output.Complete();
287
288    EXPECT_EQ(test_data.is_valid, valid) << i;
289    if (test_data.is_valid && valid)
290      EXPECT_EQ(test_data.out, resolved) << i;
291  }
292}
293