1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <algorithm>
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h"
8ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "crypto/sha2.h"
9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/safe_browsing/safe_browsing_util.h"
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/gurl.h"
11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "testing/gtest/include/gtest/gtest.h"
12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace {
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool VectorContains(const std::vector<std::string>& data,
16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                    const std::string& str) {
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return std::find(data.begin(), data.end(), str) != data.end();
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Tests that we generate the required host/path combinations for testing
23c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// according to the Safe Browsing spec.
24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// See section 6.2 in
25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
26c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(SafeBrowsingUtilTest, UrlParsing) {
27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::vector<std::string> hosts, paths;
28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  GURL url("http://a.b.c/1/2.html?param=1");
30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  safe_browsing_util::GenerateHostsToCheck(url, &hosts);
31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  safe_browsing_util::GeneratePathsToCheck(url, &paths);
32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts.size(), static_cast<size_t>(2));
33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(paths.size(), static_cast<size_t>(4));
34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts[0], "b.c");
35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts[1], "a.b.c");
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1"));
38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/1/2.html"));
39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/1/"));
40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/"));
41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url = GURL("http://a.b.c.d.e.f.g/1.html");
43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  safe_browsing_util::GenerateHostsToCheck(url, &hosts);
44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  safe_browsing_util::GeneratePathsToCheck(url, &paths);
45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts.size(), static_cast<size_t>(5));
46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(paths.size(), static_cast<size_t>(2));
47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts[0], "f.g");
48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts[1], "e.f.g");
49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts[2], "d.e.f.g");
50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts[3], "c.d.e.f.g");
51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g");
52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/1.html"));
53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/"));
54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/");
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  safe_browsing_util::GeneratePathsToCheck(url, &paths);
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(paths.size(), static_cast<size_t>(3));
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/"));
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/saw-cgi/"));
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_TRUE(VectorContains(paths, "/"));
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Tests the url canonicalization according to the Safe Browsing spec.
64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// See section 6.1 in
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(SafeBrowsingUtilTest, CanonicalizeUrl) {
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  struct {
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char* input_url;
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char* expected_canonicalized_hostname;
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char* expected_canonicalized_path;
71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const char* expected_canonicalized_query;
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  } tests[] = {
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    {
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host/%25%32%35",
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host",
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/%25",
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host/%25%32%35%25%32%35",
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host",
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/%25%25",
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host/%2525252525252525",
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host",
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/%25",
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host/asdf%25%32%35asd",
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host",
91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/asdf%25asd",
92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host/%%%25%32%35asd%%",
95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host",
96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/%25%25%25asd%25%25",
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host/%%%25%32%35asd%%",
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host",
101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/%25%25%25asd%25%25",
102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.google.com/",
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77"
110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          "%77%77%2E%65%62%61%79%2E%63%6F%6D/",
111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "168.188.99.26",
112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/.secure/www.ebay.com/",
113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd"
116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/",
117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "195.127.0.11",
118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv"
119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          "alidateinfoswqpcmlx=hgplmcx/",
120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A"
123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          "22%252833%252944_55%252B",
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host.com",
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/~a!b@c%23d$e%25f^00&11*22(33)44_55+",
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://3279880203/blah",
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "195.127.0.11",
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/blah",
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.google.com/blah/..",
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.google.com/blah#fraq",
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/blah",
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.GOOgle.com/",
144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.google.com.../",
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.google.com/q?",
154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/q",
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.google.com/q?r?",
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/q",
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "r?"
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.google.com/q?r?s",
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/q",
166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "r?s"
167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://evil.com/foo#bar#baz",
169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "evil.com",
170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/foo",
171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://evil.com/foo;",
174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "evil.com",
175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/foo;",
176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://evil.com/foo?bar;",
179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "evil.com",
180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/foo",
181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "bar;"
182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://notrailingslash.com",
184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "notrailingslash.com",
185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://www.gotaport.com:1234/",
189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.gotaport.com",
190c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "  http://www.google.com/  ",
194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.google.com",
195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http:// leadingspace.com/",
199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "%20leadingspace.com",
200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
201c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://%20leadingspace.com/",
204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "%20leadingspace.com",
205c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
207c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
208c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "https://www.securesite.com/",
209c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "www.securesite.com",
210c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/",
211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host.com/ab%23cd",
214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host.com",
215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/ab%23cd",
216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host%3e.com//twoslashes?more//slashes",
219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host>.com",
220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/twoslashes",
221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "more//slashes"
222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host.com/abc?val=xyz#anything",
224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host.com",
225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/abc",
226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "val=xyz"
227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://abc:def@host.com/xyz",
229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host.com",
230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/xyz",
231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://host%3e.com/abc/%2e%2e%2fdef",
234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host>.com",
235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/def",
236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "http://.......host...com.....//abc/////def%2F%2F%2Fxyz",
239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host.com",
240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/abc/def/xyz",
241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "ftp://host.com/foo?bar",
244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "host.com",
245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "/foo",
246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "bar"
247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "data:text/html;charset=utf-8,%0D%0A",
249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "",
250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "",
251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "javascript:alert()",
254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "",
255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "",
256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }, {
258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "mailto:abc@example.com",
259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "",
260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "",
261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ""
262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    },
263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  };
264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    SCOPED_TRACE(StringPrintf("Test: %s", tests[i].input_url));
266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    GURL url(tests[i].input_url);
267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::string canonicalized_hostname;
269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::string canonicalized_path;
270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::string canonicalized_query;
271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    safe_browsing_util::CanonicalizeUrl(url, &canonicalized_hostname,
272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        &canonicalized_path, &canonicalized_query);
273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(tests[i].expected_canonicalized_hostname,
275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch              canonicalized_hostname);
276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(tests[i].expected_canonicalized_path,
277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch              canonicalized_path);
278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(tests[i].expected_canonicalized_query,
279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch              canonicalized_query);
280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
283dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenTEST(SafeBrowsingUtilTest, GetUrlHashIndex) {
284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  GURL url("http://www.evil.com/phish.html");
285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  SBFullHashResult full_hash;
286ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  crypto::SHA256HashString(url.host() + url.path(),
287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                         &full_hash.hash,
288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                         sizeof(SBFullHash));
289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::vector<SBFullHashResult> full_hashes;
290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  full_hashes.push_back(full_hash);
291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
292dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), 0);
293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url = GURL("http://www.evil.com/okay_path.html");
295dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), -1);
296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
29721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
29821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian MonsenTEST(SafeBrowsingUtilTest, ListIdListNameConversion) {
29921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  std::string list_name;
30021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_FALSE(safe_browsing_util::GetListName(safe_browsing_util::INVALID,
30121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen                                               &list_name));
30221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::MALWARE,
30321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen                                              &list_name));
30421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(list_name, std::string(safe_browsing_util::kMalwareList));
30521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(safe_browsing_util::MALWARE,
30621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen            safe_browsing_util::GetListId(list_name));
30721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
30821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::PHISH,
30921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen                                              &list_name));
31021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(list_name, std::string(safe_browsing_util::kPhishingList));
31121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(safe_browsing_util::PHISH,
31221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen            safe_browsing_util::GetListId(list_name));
31321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
31421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINURL,
31521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen                                              &list_name));
31621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinUrlList));
31721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(safe_browsing_util::BINURL,
31821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen            safe_browsing_util::GetListId(list_name));
31921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
32021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
32121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINHASH,
32221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen                                              &list_name));
32321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinHashList));
32421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(safe_browsing_util::BINHASH,
32521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen            safe_browsing_util::GetListId(list_name));
32621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
32721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
32821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// Since the ids are saved in file, we need to make sure they don't change.
32921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// Since only the last bit of each id is saved in file together with
33021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// chunkids, this checks only last bit.
33121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian MonsenTEST(SafeBrowsingUtilTest, ListIdVerification) {
33221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(0, safe_browsing_util::MALWARE % 2);
33321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(1, safe_browsing_util::PHISH % 2);
33421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(0, safe_browsing_util::BINURL %2);
33521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  EXPECT_EQ(1, safe_browsing_util::BINHASH % 2);
33621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
337dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
338ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenTEST(SafeBrowsingUtilTest, StringToSBFullHashAndSBFullHashToString) {
339dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // 31 chars plus the last \0 as full_hash.
340dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const std::string hash_in = "12345678902234567890323456789012";
341dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SBFullHash hash_out;
342dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  safe_browsing_util::StringToSBFullHash(hash_in, &hash_out);
343dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  EXPECT_EQ(0x34333231, hash_out.prefix);
344dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  EXPECT_EQ(0, memcmp(hash_in.data(), hash_out.full_hash, sizeof(SBFullHash)));
345ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
346ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::string hash_final = safe_browsing_util::SBFullHashToString(hash_out);
347ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  EXPECT_EQ(hash_in, hash_final);
348dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
349