safe_browsing_util_unittest.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <algorithm>
6
7#include "base/sha2.h"
8#include "base/string_util.h"
9#include "chrome/browser/safe_browsing/safe_browsing_util.h"
10#include "googleurl/src/gurl.h"
11#include "testing/gtest/include/gtest/gtest.h"
12
13namespace {
14
15bool VectorContains(const std::vector<std::string>& data,
16                    const std::string& str) {
17  return std::find(data.begin(), data.end(), str) != data.end();
18}
19
20}
21
22// Tests that we generate the required host/path combinations for testing
23// according to the Safe Browsing spec.
24// See section 6.2 in
25// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
26TEST(SafeBrowsingUtilTest, UrlParsing) {
27  std::vector<std::string> hosts, paths;
28
29  GURL url("http://a.b.c/1/2.html?param=1");
30  safe_browsing_util::GenerateHostsToCheck(url, &hosts);
31  safe_browsing_util::GeneratePathsToCheck(url, &paths);
32  EXPECT_EQ(hosts.size(), static_cast<size_t>(2));
33  EXPECT_EQ(paths.size(), static_cast<size_t>(4));
34  EXPECT_EQ(hosts[0], "b.c");
35  EXPECT_EQ(hosts[1], "a.b.c");
36
37  EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1"));
38  EXPECT_TRUE(VectorContains(paths, "/1/2.html"));
39  EXPECT_TRUE(VectorContains(paths, "/1/"));
40  EXPECT_TRUE(VectorContains(paths, "/"));
41
42  url = GURL("http://a.b.c.d.e.f.g/1.html");
43  safe_browsing_util::GenerateHostsToCheck(url, &hosts);
44  safe_browsing_util::GeneratePathsToCheck(url, &paths);
45  EXPECT_EQ(hosts.size(), static_cast<size_t>(5));
46  EXPECT_EQ(paths.size(), static_cast<size_t>(2));
47  EXPECT_EQ(hosts[0], "f.g");
48  EXPECT_EQ(hosts[1], "e.f.g");
49  EXPECT_EQ(hosts[2], "d.e.f.g");
50  EXPECT_EQ(hosts[3], "c.d.e.f.g");
51  EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g");
52  EXPECT_TRUE(VectorContains(paths, "/1.html"));
53  EXPECT_TRUE(VectorContains(paths, "/"));
54
55  url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/");
56  safe_browsing_util::GeneratePathsToCheck(url, &paths);
57  EXPECT_EQ(paths.size(), static_cast<size_t>(3));
58  EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/"));
59  EXPECT_TRUE(VectorContains(paths, "/saw-cgi/"));
60  EXPECT_TRUE(VectorContains(paths, "/"));
61}
62
63// Tests the url canonicalization according to the Safe Browsing spec.
64// See section 6.1 in
65// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
66TEST(SafeBrowsingUtilTest, CanonicalizeUrl) {
67  struct {
68    const char* input_url;
69    const char* expected_canonicalized_hostname;
70    const char* expected_canonicalized_path;
71    const char* expected_canonicalized_query;
72  } tests[] = {
73    {
74      "http://host/%25%32%35",
75      "host",
76      "/%25",
77      ""
78    }, {
79      "http://host/%25%32%35%25%32%35",
80      "host",
81      "/%25%25",
82      ""
83    }, {
84      "http://host/%2525252525252525",
85      "host",
86      "/%25",
87      ""
88    }, {
89      "http://host/asdf%25%32%35asd",
90      "host",
91      "/asdf%25asd",
92      ""
93    }, {
94      "http://host/%%%25%32%35asd%%",
95      "host",
96      "/%25%25%25asd%25%25",
97      ""
98    }, {
99      "http://host/%%%25%32%35asd%%",
100      "host",
101      "/%25%25%25asd%25%25",
102      ""
103    }, {
104      "http://www.google.com/",
105      "www.google.com",
106      "/",
107      ""
108    }, {
109      "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77"
110          "%77%77%2E%65%62%61%79%2E%63%6F%6D/",
111      "168.188.99.26",
112      "/.secure/www.ebay.com/",
113      ""
114    }, {
115      "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd"
116          "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/",
117      "195.127.0.11",
118      "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv"
119          "alidateinfoswqpcmlx=hgplmcx/",
120      ""
121    }, {
122      "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A"
123          "22%252833%252944_55%252B",
124      "host.com",
125      "/~a!b@c%23d$e%25f^00&11*22(33)44_55+",
126      ""
127    }, {
128      "http://3279880203/blah",
129      "195.127.0.11",
130      "/blah",
131      ""
132    }, {
133      "http://www.google.com/blah/..",
134      "www.google.com",
135      "/",
136      ""
137    }, {
138      "http://www.google.com/blah#fraq",
139      "www.google.com",
140      "/blah",
141      ""
142    }, {
143      "http://www.GOOgle.com/",
144      "www.google.com",
145      "/",
146      ""
147    }, {
148      "http://www.google.com.../",
149      "www.google.com",
150      "/",
151      ""
152    }, {
153      "http://www.google.com/q?",
154      "www.google.com",
155      "/q",
156      ""
157    }, {
158      "http://www.google.com/q?r?",
159      "www.google.com",
160      "/q",
161      "r?"
162    }, {
163      "http://www.google.com/q?r?s",
164      "www.google.com",
165      "/q",
166      "r?s"
167    }, {
168      "http://evil.com/foo#bar#baz",
169      "evil.com",
170      "/foo",
171      ""
172    }, {
173      "http://evil.com/foo;",
174      "evil.com",
175      "/foo;",
176      ""
177    }, {
178      "http://evil.com/foo?bar;",
179      "evil.com",
180      "/foo",
181      "bar;"
182    }, {
183      "http://notrailingslash.com",
184      "notrailingslash.com",
185      "/",
186      ""
187    }, {
188      "http://www.gotaport.com:1234/",
189      "www.gotaport.com",
190      "/",
191      ""
192    }, {
193      "  http://www.google.com/  ",
194      "www.google.com",
195      "/",
196      ""
197    }, {
198      "http:// leadingspace.com/",
199      "%20leadingspace.com",
200      "/",
201      ""
202    }, {
203      "http://%20leadingspace.com/",
204      "%20leadingspace.com",
205      "/",
206      ""
207    }, {
208      "https://www.securesite.com/",
209      "www.securesite.com",
210      "/",
211      ""
212    }, {
213      "http://host.com/ab%23cd",
214      "host.com",
215      "/ab%23cd",
216      ""
217    }, {
218      "http://host%3e.com//twoslashes?more//slashes",
219      "host>.com",
220      "/twoslashes",
221      "more//slashes"
222    }, {
223      "http://host.com/abc?val=xyz#anything",
224      "host.com",
225      "/abc",
226      "val=xyz"
227    }, {
228      "http://abc:def@host.com/xyz",
229      "host.com",
230      "/xyz",
231      ""
232    }, {
233      "http://host%3e.com/abc/%2e%2e%2fdef",
234      "host>.com",
235      "/def",
236      ""
237    }, {
238      "http://.......host...com.....//abc/////def%2F%2F%2Fxyz",
239      "host.com",
240      "/abc/def/xyz",
241      ""
242    }, {
243      "ftp://host.com/foo?bar",
244      "host.com",
245      "/foo",
246      "bar"
247    }, {
248      "data:text/html;charset=utf-8,%0D%0A",
249      "",
250      "",
251      ""
252    }, {
253      "javascript:alert()",
254      "",
255      "",
256      ""
257    }, {
258      "mailto:abc@example.com",
259      "",
260      "",
261      ""
262    },
263  };
264  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
265    SCOPED_TRACE(StringPrintf("Test: %s", tests[i].input_url));
266    GURL url(tests[i].input_url);
267
268    std::string canonicalized_hostname;
269    std::string canonicalized_path;
270    std::string canonicalized_query;
271    safe_browsing_util::CanonicalizeUrl(url, &canonicalized_hostname,
272        &canonicalized_path, &canonicalized_query);
273
274    EXPECT_EQ(tests[i].expected_canonicalized_hostname,
275              canonicalized_hostname);
276    EXPECT_EQ(tests[i].expected_canonicalized_path,
277              canonicalized_path);
278    EXPECT_EQ(tests[i].expected_canonicalized_query,
279              canonicalized_query);
280  }
281}
282
283TEST(SafeBrowsingUtilTest, FullHashCompare) {
284  GURL url("http://www.evil.com/phish.html");
285  SBFullHashResult full_hash;
286  base::SHA256HashString(url.host() + url.path(),
287                         &full_hash.hash,
288                         sizeof(SBFullHash));
289  std::vector<SBFullHashResult> full_hashes;
290  full_hashes.push_back(full_hash);
291
292  EXPECT_EQ(safe_browsing_util::CompareFullHashes(url, full_hashes), 0);
293
294  url = GURL("http://www.evil.com/okay_path.html");
295  EXPECT_EQ(safe_browsing_util::CompareFullHashes(url, full_hashes), -1);
296}
297