1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <algorithm>
6
7#include "base/strings/stringprintf.h"
8#include "chrome/browser/safe_browsing/safe_browsing_util.h"
9#include "testing/gtest/include/gtest/gtest.h"
10#include "url/gurl.h"
11
12namespace {
13
14bool VectorContains(const std::vector<std::string>& data,
15                    const std::string& str) {
16  return std::find(data.begin(), data.end(), str) != data.end();
17}
18
19// Tests that we generate the required host/path combinations for testing
20// according to the Safe Browsing spec.
21// See section 6.2 in
22// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
23TEST(SafeBrowsingUtilTest, UrlParsing) {
24  std::vector<std::string> hosts, paths;
25
26  GURL url("http://a.b.c/1/2.html?param=1");
27  safe_browsing_util::GenerateHostsToCheck(url, &hosts);
28  safe_browsing_util::GeneratePathsToCheck(url, &paths);
29  EXPECT_EQ(hosts.size(), static_cast<size_t>(2));
30  EXPECT_EQ(paths.size(), static_cast<size_t>(4));
31  EXPECT_EQ(hosts[0], "b.c");
32  EXPECT_EQ(hosts[1], "a.b.c");
33
34  EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1"));
35  EXPECT_TRUE(VectorContains(paths, "/1/2.html"));
36  EXPECT_TRUE(VectorContains(paths, "/1/"));
37  EXPECT_TRUE(VectorContains(paths, "/"));
38
39  url = GURL("http://a.b.c.d.e.f.g/1.html");
40  safe_browsing_util::GenerateHostsToCheck(url, &hosts);
41  safe_browsing_util::GeneratePathsToCheck(url, &paths);
42  EXPECT_EQ(hosts.size(), static_cast<size_t>(5));
43  EXPECT_EQ(paths.size(), static_cast<size_t>(2));
44  EXPECT_EQ(hosts[0], "f.g");
45  EXPECT_EQ(hosts[1], "e.f.g");
46  EXPECT_EQ(hosts[2], "d.e.f.g");
47  EXPECT_EQ(hosts[3], "c.d.e.f.g");
48  EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g");
49  EXPECT_TRUE(VectorContains(paths, "/1.html"));
50  EXPECT_TRUE(VectorContains(paths, "/"));
51
52  url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/");
53  safe_browsing_util::GeneratePathsToCheck(url, &paths);
54  EXPECT_EQ(paths.size(), static_cast<size_t>(3));
55  EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/"));
56  EXPECT_TRUE(VectorContains(paths, "/saw-cgi/"));
57  EXPECT_TRUE(VectorContains(paths, "/"));
58}
59
60// Tests the url canonicalization according to the Safe Browsing spec.
61// See section 6.1 in
62// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
63TEST(SafeBrowsingUtilTest, CanonicalizeUrl) {
64  struct {
65    const char* input_url;
66    const char* expected_canonicalized_hostname;
67    const char* expected_canonicalized_path;
68    const char* expected_canonicalized_query;
69  } tests[] = {
70    {
71      "http://host/%25%32%35",
72      "host",
73      "/%25",
74      ""
75    }, {
76      "http://host/%25%32%35%25%32%35",
77      "host",
78      "/%25%25",
79      ""
80    }, {
81      "http://host/%2525252525252525",
82      "host",
83      "/%25",
84      ""
85    }, {
86      "http://host/asdf%25%32%35asd",
87      "host",
88      "/asdf%25asd",
89      ""
90    }, {
91      "http://host/%%%25%32%35asd%%",
92      "host",
93      "/%25%25%25asd%25%25",
94      ""
95    }, {
96      "http://host/%%%25%32%35asd%%",
97      "host",
98      "/%25%25%25asd%25%25",
99      ""
100    }, {
101      "http://www.google.com/",
102      "www.google.com",
103      "/",
104      ""
105    }, {
106      "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77"
107          "%77%77%2E%65%62%61%79%2E%63%6F%6D/",
108      "168.188.99.26",
109      "/.secure/www.ebay.com/",
110      ""
111    }, {
112      "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd"
113          "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/",
114      "195.127.0.11",
115      "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv"
116          "alidateinfoswqpcmlx=hgplmcx/",
117      ""
118    }, {
119      "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A"
120          "22%252833%252944_55%252B",
121      "host.com",
122      "/~a!b@c%23d$e%25f^00&11*22(33)44_55+",
123      ""
124    }, {
125      "http://3279880203/blah",
126      "195.127.0.11",
127      "/blah",
128      ""
129    }, {
130      "http://www.google.com/blah/..",
131      "www.google.com",
132      "/",
133      ""
134    }, {
135      "http://www.google.com/blah#fraq",
136      "www.google.com",
137      "/blah",
138      ""
139    }, {
140      "http://www.GOOgle.com/",
141      "www.google.com",
142      "/",
143      ""
144    }, {
145      "http://www.google.com.../",
146      "www.google.com",
147      "/",
148      ""
149    }, {
150      "http://www.google.com/q?",
151      "www.google.com",
152      "/q",
153      ""
154    }, {
155      "http://www.google.com/q?r?",
156      "www.google.com",
157      "/q",
158      "r?"
159    }, {
160      "http://www.google.com/q?r?s",
161      "www.google.com",
162      "/q",
163      "r?s"
164    }, {
165      "http://evil.com/foo#bar#baz",
166      "evil.com",
167      "/foo",
168      ""
169    }, {
170      "http://evil.com/foo;",
171      "evil.com",
172      "/foo;",
173      ""
174    }, {
175      "http://evil.com/foo?bar;",
176      "evil.com",
177      "/foo",
178      "bar;"
179    }, {
180      "http://notrailingslash.com",
181      "notrailingslash.com",
182      "/",
183      ""
184    }, {
185      "http://www.gotaport.com:1234/",
186      "www.gotaport.com",
187      "/",
188      ""
189    }, {
190      "  http://www.google.com/  ",
191      "www.google.com",
192      "/",
193      ""
194    }, {
195      "http:// leadingspace.com/",
196      "%20leadingspace.com",
197      "/",
198      ""
199    }, {
200      "http://%20leadingspace.com/",
201      "%20leadingspace.com",
202      "/",
203      ""
204    }, {
205      "https://www.securesite.com/",
206      "www.securesite.com",
207      "/",
208      ""
209    }, {
210      "http://host.com/ab%23cd",
211      "host.com",
212      "/ab%23cd",
213      ""
214    }, {
215      "http://host%3e.com//twoslashes?more//slashes",
216      "host>.com",
217      "/twoslashes",
218      "more//slashes"
219    }, {
220      "http://host.com/abc?val=xyz#anything",
221      "host.com",
222      "/abc",
223      "val=xyz"
224    }, {
225      "http://abc:def@host.com/xyz",
226      "host.com",
227      "/xyz",
228      ""
229    }, {
230      "http://host%3e.com/abc/%2e%2e%2fdef",
231      "host>.com",
232      "/def",
233      ""
234    }, {
235      "http://.......host...com.....//abc/////def%2F%2F%2Fxyz",
236      "host.com",
237      "/abc/def/xyz",
238      ""
239    }, {
240      "ftp://host.com/foo?bar",
241      "host.com",
242      "/foo",
243      "bar"
244    }, {
245      "data:text/html;charset=utf-8,%0D%0A",
246      "",
247      "",
248      ""
249    }, {
250      "javascript:alert()",
251      "",
252      "",
253      ""
254    }, {
255      "mailto:abc@example.com",
256      "",
257      "",
258      ""
259    },
260  };
261  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
262    SCOPED_TRACE(base::StringPrintf("Test: %s", tests[i].input_url));
263    GURL url(tests[i].input_url);
264
265    std::string canonicalized_hostname;
266    std::string canonicalized_path;
267    std::string canonicalized_query;
268    safe_browsing_util::CanonicalizeUrl(url, &canonicalized_hostname,
269        &canonicalized_path, &canonicalized_query);
270
271    EXPECT_EQ(tests[i].expected_canonicalized_hostname,
272              canonicalized_hostname);
273    EXPECT_EQ(tests[i].expected_canonicalized_path,
274              canonicalized_path);
275    EXPECT_EQ(tests[i].expected_canonicalized_query,
276              canonicalized_query);
277  }
278}
279
280TEST(SafeBrowsingUtilTest, ListIdListNameConversion) {
281  std::string list_name;
282  EXPECT_FALSE(safe_browsing_util::GetListName(safe_browsing_util::INVALID,
283                                               &list_name));
284  EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::MALWARE,
285                                              &list_name));
286  EXPECT_EQ(list_name, std::string(safe_browsing_util::kMalwareList));
287  EXPECT_EQ(safe_browsing_util::MALWARE,
288            safe_browsing_util::GetListId(list_name));
289
290  EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::PHISH,
291                                              &list_name));
292  EXPECT_EQ(list_name, std::string(safe_browsing_util::kPhishingList));
293  EXPECT_EQ(safe_browsing_util::PHISH,
294            safe_browsing_util::GetListId(list_name));
295
296  EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINURL,
297                                              &list_name));
298  EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinUrlList));
299  EXPECT_EQ(safe_browsing_util::BINURL,
300            safe_browsing_util::GetListId(list_name));
301}
302
303// Since the ids are saved in file, we need to make sure they don't change.
304// Since only the last bit of each id is saved in file together with
305// chunkids, this checks only last bit.
306TEST(SafeBrowsingUtilTest, ListIdVerification) {
307  EXPECT_EQ(0, safe_browsing_util::MALWARE % 2);
308  EXPECT_EQ(1, safe_browsing_util::PHISH % 2);
309  EXPECT_EQ(0, safe_browsing_util::BINURL %2);
310}
311
312TEST(SafeBrowsingUtilTest, StringToSBFullHashAndSBFullHashToString) {
313  // 31 chars plus the last \0 as full_hash.
314  const std::string hash_in = "12345678902234567890323456789012";
315  SBFullHash hash_out = safe_browsing_util::StringToSBFullHash(hash_in);
316  EXPECT_EQ(0x34333231U, hash_out.prefix);
317  EXPECT_EQ(0, memcmp(hash_in.data(), hash_out.full_hash, sizeof(SBFullHash)));
318
319  std::string hash_final = safe_browsing_util::SBFullHashToString(hash_out);
320  EXPECT_EQ(hash_in, hash_final);
321}
322
323TEST(SafeBrowsingUtilTest, FullHashOperators) {
324  const SBFullHash kHash1 = SBFullHashForString("one");
325  const SBFullHash kHash2 = SBFullHashForString("two");
326
327  EXPECT_TRUE(SBFullHashEqual(kHash1, kHash1));
328  EXPECT_TRUE(SBFullHashEqual(kHash2, kHash2));
329  EXPECT_FALSE(SBFullHashEqual(kHash1, kHash2));
330  EXPECT_FALSE(SBFullHashEqual(kHash2, kHash1));
331
332  EXPECT_FALSE(SBFullHashLess(kHash1, kHash2));
333  EXPECT_TRUE(SBFullHashLess(kHash2, kHash1));
334
335  EXPECT_FALSE(SBFullHashLess(kHash1, kHash1));
336  EXPECT_FALSE(SBFullHashLess(kHash2, kHash2));
337}
338
339}  // namespace
340