gurl_unittest.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright 2007 Google Inc. All Rights Reserved.
2// Author: brettw@google.com (Brett Wilson)
3
4#include "googleurl/src/gurl.h"
5#include "googleurl/src/url_canon.h"
6#include "googleurl/src/url_test_utils.h"
7#include "testing/gtest/include/gtest/gtest.h"
8
9// Some implementations of base/basictypes.h may define ARRAYSIZE.
10// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
11// which is in our version of basictypes.h.
12#ifndef ARRAYSIZE
13#define ARRAYSIZE ARRAYSIZE_UNSAFE
14#endif
15
16using url_test_utils::WStringToUTF16;
17using url_test_utils::ConvertUTF8ToUTF16;
18
19namespace {
20
21template<typename CHAR>
22void SetupReplacement(void (url_canon::Replacements<CHAR>::*func)(const CHAR*,
23                          const url_parse::Component&),
24                      url_canon::Replacements<CHAR>* replacements,
25                      const CHAR* str) {
26  if (str) {
27    url_parse::Component comp;
28    if (str[0])
29      comp.len = static_cast<int>(strlen(str));
30    (replacements->*func)(str, comp);
31  }
32}
33
34// Returns the canonicalized string for the given URL string for the
35// GURLTest.Types test.
36std::string TypesTestCase(const char* src) {
37  GURL gurl(src);
38  return gurl.possibly_invalid_spec();
39}
40
41}  // namespace
42
43// Different types of URLs should be handled differently by url_util, and
44// handed off to different canonicalizers.
45TEST(GURLTest, Types) {
46  // URLs with unknown schemes should be treated as path URLs, even when they
47  // have things like "://".
48  EXPECT_EQ("something:///HOSTNAME.com/",
49            TypesTestCase("something:///HOSTNAME.com/"));
50
51  // In the reverse, known schemes should always trigger standard URL handling.
52  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
53  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
54  EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
55  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
56
57#ifdef WIN32
58  // URLs that look like absolute Windows drive specs.
59  EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
60  EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
61  EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
62  EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
63#endif
64}
65
66// Test the basic creation and querying of components in a GURL. We assume
67// the parser is already tested and works, so we are mostly interested if the
68// object does the right thing with the results.
69TEST(GURLTest, Components) {
70  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
71  EXPECT_TRUE(url.is_valid());
72  EXPECT_TRUE(url.SchemeIs("http"));
73  EXPECT_FALSE(url.SchemeIsFile());
74
75  // This is the narrow version of the URL, which should match the wide input.
76  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec());
77
78  EXPECT_EQ("http", url.scheme());
79  EXPECT_EQ("user", url.username());
80  EXPECT_EQ("pass", url.password());
81  EXPECT_EQ("google.com", url.host());
82  EXPECT_EQ("99", url.port());
83  EXPECT_EQ(99, url.IntPort());
84  EXPECT_EQ("/foo;bar", url.path());
85  EXPECT_EQ("q=a", url.query());
86  EXPECT_EQ("ref", url.ref());
87}
88
89TEST(GURLTest, Empty) {
90  GURL url;
91  EXPECT_FALSE(url.is_valid());
92  EXPECT_EQ("", url.spec());
93
94  EXPECT_EQ("", url.scheme());
95  EXPECT_EQ("", url.username());
96  EXPECT_EQ("", url.password());
97  EXPECT_EQ("", url.host());
98  EXPECT_EQ("", url.port());
99  EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort());
100  EXPECT_EQ("", url.path());
101  EXPECT_EQ("", url.query());
102  EXPECT_EQ("", url.ref());
103}
104
105TEST(GURLTest, Copy) {
106  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
107
108  GURL url2(url);
109  EXPECT_TRUE(url2.is_valid());
110
111  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
112  EXPECT_EQ("http", url2.scheme());
113  EXPECT_EQ("user", url2.username());
114  EXPECT_EQ("pass", url2.password());
115  EXPECT_EQ("google.com", url2.host());
116  EXPECT_EQ("99", url2.port());
117  EXPECT_EQ(99, url2.IntPort());
118  EXPECT_EQ("/foo;bar", url2.path());
119  EXPECT_EQ("q=a", url2.query());
120  EXPECT_EQ("ref", url2.ref());
121
122  // Copying of invalid URL should be invalid
123  GURL invalid;
124  GURL invalid2(invalid);
125  EXPECT_FALSE(invalid2.is_valid());
126  EXPECT_EQ("", invalid2.spec());
127  EXPECT_EQ("", invalid2.scheme());
128  EXPECT_EQ("", invalid2.username());
129  EXPECT_EQ("", invalid2.password());
130  EXPECT_EQ("", invalid2.host());
131  EXPECT_EQ("", invalid2.port());
132  EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort());
133  EXPECT_EQ("", invalid2.path());
134  EXPECT_EQ("", invalid2.query());
135  EXPECT_EQ("", invalid2.ref());
136}
137
138// Given an invalid URL, we should still get most of the components.
139TEST(GURLTest, Invalid) {
140  GURL url("http:google.com:foo");
141  EXPECT_FALSE(url.is_valid());
142  EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec());
143
144  EXPECT_EQ("http", url.scheme());
145  EXPECT_EQ("", url.username());
146  EXPECT_EQ("", url.password());
147  EXPECT_EQ("google.com", url.host());
148  EXPECT_EQ("foo", url.port());
149  EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort());
150  EXPECT_EQ("/", url.path());
151  EXPECT_EQ("", url.query());
152  EXPECT_EQ("", url.ref());
153}
154
155TEST(GURLTest, Resolve) {
156  // The tricky cases for relative URL resolving are tested in the
157  // canonicalizer unit test. Here, we just test that the GURL integration
158  // works properly.
159  struct ResolveCase {
160    const char* base;
161    const char* relative;
162    bool expected_valid;
163    const char* expected;
164  } resolve_cases[] = {
165    {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"},
166    {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
167    {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
168    {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
169    {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
170      // Unknown schemes are not standard.
171    {"data:blahblah", "http://google.com/", true, "http://google.com/"},
172    {"data:blahblah", "http:google.com", true, "http://google.com/"},
173    {"data:/blahblah", "file.html", false, ""},
174  };
175
176  for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) {
177    // 8-bit code path.
178    GURL input(resolve_cases[i].base);
179    GURL output = input.Resolve(resolve_cases[i].relative);
180    EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
181    EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
182
183    // Wide code path.
184    GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
185    GURL outputw =
186        input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
187    EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
188    EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
189  }
190}
191
192TEST(GURLTest, GetOrigin) {
193  struct TestCase {
194    const char* input;
195    const char* expected;
196  } cases[] = {
197    {"http://www.google.com", "http://www.google.com/"},
198    {"javascript:window.alert(\"hello,world\");", ""},
199    {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"},
200    {"http://user@www.google.com", "http://www.google.com/"},
201    {"http://:pass@www.google.com", "http://www.google.com/"},
202    {"http://:@www.google.com", "http://www.google.com/"},
203  };
204  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
205    GURL url(cases[i].input);
206    GURL origin = url.GetOrigin();
207    EXPECT_EQ(cases[i].expected, origin.spec());
208  }
209}
210
211TEST(GURLTest, GetWithEmptyPath) {
212  struct TestCase {
213    const char* input;
214    const char* expected;
215  } cases[] = {
216    {"http://www.google.com", "http://www.google.com/"},
217    {"javascript:window.alert(\"hello, world\");", ""},
218    {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
219  };
220
221  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
222    GURL url(cases[i].input);
223    GURL empty_path = url.GetWithEmptyPath();
224    EXPECT_EQ(cases[i].expected, empty_path.spec());
225  }
226}
227
228TEST(GURLTest, Replacements) {
229  // The url canonicalizer replacement test will handle most of these case.
230  // The most important thing to do here is to check that the proper
231  // canonicalizer gets called based on the scheme of the input.
232  struct ReplaceCase {
233    const char* base;
234    const char* scheme;
235    const char* username;
236    const char* password;
237    const char* host;
238    const char* port;
239    const char* path;
240    const char* query;
241    const char* ref;
242    const char* expected;
243  } replace_cases[] = {
244    {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
245    {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
246    {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
247#ifdef WIN32
248    {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
249#endif
250  };
251
252  for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) {
253    const ReplaceCase& cur = replace_cases[i];
254    GURL url(cur.base);
255    GURL::Replacements repl;
256    SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme);
257    SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username);
258    SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password);
259    SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host);
260    SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port);
261    SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path);
262    SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query);
263    SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref);
264    GURL output = url.ReplaceComponents(repl);
265
266    EXPECT_EQ(replace_cases[i].expected, output.spec());
267  }
268}
269
270TEST(GURLTest, PathForRequest) {
271  struct TestCase {
272    const char* input;
273    const char* expected;
274  } cases[] = {
275    {"http://www.google.com", "/"},
276    {"http://www.google.com/", "/"},
277    {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22"},
278    {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html"},
279    {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query"},
280  };
281
282  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
283    GURL url(cases[i].input);
284    std::string path_request = url.PathForRequest();
285    EXPECT_EQ(cases[i].expected, path_request);
286  }
287}
288
289TEST(GURLTest, EffectiveIntPort) {
290  struct PortTest {
291    const char* spec;
292    int expected_int_port;
293  } port_tests[] = {
294    // http
295    {"http://www.google.com/", 80},
296    {"http://www.google.com:80/", 80},
297    {"http://www.google.com:443/", 443},
298
299    // https
300    {"https://www.google.com/", 443},
301    {"https://www.google.com:443/", 443},
302    {"https://www.google.com:80/", 80},
303
304    // ftp
305    {"ftp://www.google.com/", 21},
306    {"ftp://www.google.com:21/", 21},
307    {"ftp://www.google.com:80/", 80},
308
309    // gopher
310    {"gopher://www.google.com/", 70},
311    {"gopher://www.google.com:70/", 70},
312    {"gopher://www.google.com:80/", 80},
313
314    // file - no port
315    {"file://www.google.com/", url_parse::PORT_UNSPECIFIED},
316    {"file://www.google.com:443/", url_parse::PORT_UNSPECIFIED},
317
318    // data - no port
319    {"data:www.google.com:90", url_parse::PORT_UNSPECIFIED},
320    {"data:www.google.com", url_parse::PORT_UNSPECIFIED},
321  };
322
323  for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) {
324    GURL url(port_tests[i].spec);
325    EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
326  }
327}
328
329TEST(GURLTest, IPAddress) {
330  struct IPTest {
331    const char* spec;
332    bool expected_ip;
333  } ip_tests[] = {
334    {"http://www.google.com/", false},
335    {"http://192.168.9.1/", true},
336    {"http://192.168.9.1.2/", false},
337    {"http://192.168.m.1/", false},
338    {"http://2001:db8::1/", false},
339    {"http://[2001:db8::1]/", true},
340    {"", false},
341    {"some random input!", false},
342  };
343
344  for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) {
345    GURL url(ip_tests[i].spec);
346    EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
347  }
348}
349
350TEST(GURLTest, HostNoBrackets) {
351  struct TestCase {
352    const char* input;
353    const char* expected_host;
354    const char* expected_plainhost;
355  } cases[] = {
356    {"http://www.google.com", "www.google.com", "www.google.com"},
357    {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
358    {"http://[::]/", "[::]", "::"},
359
360    // Don't require a valid URL, but don't crash either.
361    {"http://[]/", "[]", ""},
362    {"http://[x]/", "[x]", "x"},
363    {"http://[x/", "[x", "[x"},
364    {"http://x]/", "x]", "x]"},
365    {"http://[/", "[", "["},
366    {"http://]/", "]", "]"},
367    {"", "", ""},
368  };
369  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
370    GURL url(cases[i].input);
371    EXPECT_EQ(cases[i].expected_host, url.host());
372    EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
373  }
374}
375
376TEST(GURLTest, DomainIs) {
377  const char google_domain[] = "google.com";
378
379  GURL url_1("http://www.google.com:99/foo");
380  EXPECT_TRUE(url_1.DomainIs(google_domain));
381
382  GURL url_2("http://google.com:99/foo");
383  EXPECT_TRUE(url_2.DomainIs(google_domain));
384
385  GURL url_3("http://google.com./foo");
386  EXPECT_TRUE(url_3.DomainIs(google_domain));
387
388  GURL url_4("http://google.com/foo");
389  EXPECT_FALSE(url_4.DomainIs("google.com."));
390
391  GURL url_5("http://google.com./foo");
392  EXPECT_TRUE(url_5.DomainIs("google.com."));
393
394  GURL url_6("http://www.google.com./foo");
395  EXPECT_TRUE(url_6.DomainIs(".com."));
396
397  GURL url_7("http://www.balabala.com/foo");
398  EXPECT_FALSE(url_7.DomainIs(google_domain));
399
400  GURL url_8("http://www.google.com.cn/foo");
401  EXPECT_FALSE(url_8.DomainIs(google_domain));
402
403  GURL url_9("http://www.iamnotgoogle.com/foo");
404  EXPECT_FALSE(url_9.DomainIs(google_domain));
405
406  GURL url_10("http://www.iamnotgoogle.com../foo");
407  EXPECT_FALSE(url_10.DomainIs(".com"));
408}
409
410// Newlines should be stripped from inputs.
411TEST(GURLTest, Newlines) {
412  // Constructor.
413  GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
414  EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
415
416  // Relative path resolver.
417  GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
418  EXPECT_EQ("http://www.google.com/foo", url_2.spec());
419
420  // Note that newlines are NOT stripped from ReplaceComponents.
421}
422
423TEST(GURLTest, IsStandard) {
424  GURL a("http:foo/bar");
425  EXPECT_TRUE(a.IsStandard());
426
427  GURL b("foo:bar/baz");
428  EXPECT_FALSE(b.IsStandard());
429
430  GURL c("foo://bar/baz");
431  EXPECT_FALSE(c.IsStandard());
432}
433