phishing_classifier_browsertest.cc revision a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/renderer/safe_browsing/phishing_classifier.h"
6
7#include <string>
8
9#include "base/bind.h"
10#include "base/command_line.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/strings/string16.h"
13#include "base/strings/utf_string_conversions.h"
14#include "chrome/common/chrome_switches.h"
15#include "chrome/common/safe_browsing/client_model.pb.h"
16#include "chrome/common/safe_browsing/csd.pb.h"
17#include "chrome/renderer/safe_browsing/features.h"
18#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
19#include "chrome/renderer/safe_browsing/murmurhash3_util.h"
20#include "chrome/renderer/safe_browsing/scorer.h"
21#include "chrome/test/base/in_process_browser_test.h"
22#include "chrome/test/base/ui_test_utils.h"
23#include "content/public/renderer/render_view.h"
24#include "crypto/sha2.h"
25#include "net/dns/mock_host_resolver.h"
26#include "net/test/embedded_test_server/embedded_test_server.h"
27#include "net/test/embedded_test_server/http_response.h"
28#include "testing/gmock/include/gmock/gmock.h"
29#include "url/gurl.h"
30
31using ::testing::AllOf;
32using ::testing::Contains;
33using ::testing::Not;
34using ::testing::Pair;
35
36namespace safe_browsing {
37
38class PhishingClassifierTest : public InProcessBrowserTest {
39 protected:
40  PhishingClassifierTest()
41      : url_tld_token_net_(features::kUrlTldToken + std::string("net")),
42        page_link_domain_phishing_(features::kPageLinkDomain +
43                                   std::string("phishing.com")),
44        page_term_login_(features::kPageTerm + std::string("login")) {
45  }
46
47  virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
48    command_line->AppendSwitch(switches::kSingleProcess);
49#if defined(OS_WIN) && defined(USE_AURA)
50    // Don't want to try to create a GPU process.
51    command_line->AppendSwitch(switches::kDisableAcceleratedCompositing);
52#endif
53  }
54
55  virtual void SetUpOnMainThread() OVERRIDE {
56    // Construct a model to test with.  We include one feature from each of
57    // the feature extractors, which allows us to verify that they all ran.
58    ClientSideModel model;
59
60    model.add_hashes(crypto::SHA256HashString(url_tld_token_net_));
61    model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_));
62    model.add_hashes(crypto::SHA256HashString(page_term_login_));
63    model.add_hashes(crypto::SHA256HashString("login"));
64    model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken +
65                                              std::string("net")));
66    model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain +
67                                              std::string("phishing.com")));
68    model.add_hashes(crypto::SHA256HashString(features::kPageTerm +
69                                              std::string("login")));
70    model.add_hashes(crypto::SHA256HashString("login"));
71
72    // Add a default rule with a non-phishy weight.
73    ClientSideModel::Rule* rule = model.add_rule();
74    rule->set_weight(-1.0);
75
76    // To give a phishy score, the total weight needs to be >= 0
77    // (0.5 when converted to a probability).  This will only happen
78    // if all of the listed features are present.
79    rule = model.add_rule();
80    rule->add_feature(0);
81    rule->add_feature(1);
82    rule->add_feature(2);
83    rule->set_weight(1.0);
84
85    model.add_page_term(3);
86    model.set_murmur_hash_seed(2777808611U);
87    model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed()));
88    model.set_max_words_per_term(1);
89
90    clock_ = new MockFeatureExtractorClock;
91    scorer_.reset(Scorer::Create(model.SerializeAsString()));
92    ASSERT_TRUE(scorer_.get());
93
94    classifier_.reset(new PhishingClassifier(
95        content::RenderView::FromRoutingID(1),
96        clock_));
97  }
98
99  virtual void TearDownOnMainThread() OVERRIDE {
100    content::RunAllPendingInMessageLoop();
101  }
102
103  // Helper method to start phishing classification and wait for it to
104  // complete.  Returns the true if the page is classified as phishy and
105  // false otherwise.
106  bool RunPhishingClassifier(const base::string16* page_text,
107                             float* phishy_score,
108                             FeatureMap* features) {
109    ClientPhishingRequest verdict;
110    // The classifier accesses the RenderView and must run in the RenderThread.
111    PostTaskToInProcessRendererAndWait(
112        base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier,
113                   base::Unretained(this),
114                   page_text, phishy_score, features, &verdict));
115    return verdict.is_phishing();
116  }
117
118  void DoRunPhishingClassifier(const base::string16* page_text,
119                               float* phishy_score,
120                               FeatureMap* features,
121                               ClientPhishingRequest* verdict) {
122    *phishy_score = PhishingClassifier::kInvalidScore;
123    features->Clear();
124
125    // Force synchronous behavior for ease of unittesting.
126    base::RunLoop run_loop;
127    classifier_->BeginClassification(
128        page_text,
129        base::Bind(&PhishingClassifierTest::ClassificationFinished,
130                   base::Unretained(this), &run_loop, verdict));
131    content::RunThisRunLoop(&run_loop);
132
133    *phishy_score = verdict->client_score();
134    for (int i = 0; i < verdict->feature_map_size(); ++i) {
135      features->AddRealFeature(verdict->feature_map(i).name(),
136                               verdict->feature_map(i).value());
137    }
138  }
139
140  // Completion callback for classification.
141  void ClassificationFinished(base::RunLoop* run_loop,
142                              ClientPhishingRequest* verdict_out,
143                              const ClientPhishingRequest& verdict) {
144    *verdict_out = verdict;  // Copy the verdict.
145    run_loop->Quit();
146  }
147
148  scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
149  net::test_server::EmbeddedTestServer* embedded_test_server() {
150    // TODO(ajwong): Merge this into BrowserTestBase.
151    if (!embedded_test_server_) {
152      embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
153      embedded_test_server_->RegisterRequestHandler(
154          base::Bind(&PhishingClassifierTest::HandleRequest,
155                     base::Unretained(this)));
156      CHECK(embedded_test_server_->InitializeAndWaitUntilReady());
157    }
158    return embedded_test_server_.get();
159  }
160
161  void LoadHtml(const std::string& host, const std::string& content) {
162    GURL::Replacements replace_host;
163    replace_host.SetHostStr(host);
164    response_content_ = content;
165    ui_test_utils::NavigateToURL(
166        browser(),
167        embedded_test_server()->base_url().ReplaceComponents(replace_host));
168  }
169
170  void LoadHtmlPost(const std::string& host, const std::string& content) {
171    GURL::Replacements replace_host;
172    replace_host.SetHostStr(host);
173    response_content_ = content;
174    ui_test_utils::NavigateToURLWithPost(
175        browser(),
176        embedded_test_server()->base_url().ReplaceComponents(replace_host));
177  }
178
179  scoped_ptr<net::test_server::HttpResponse>
180      HandleRequest(const net::test_server::HttpRequest& request) {
181    scoped_ptr<net::test_server::BasicHttpResponse> http_response(
182        new net::test_server::BasicHttpResponse());
183    http_response->set_code(net::HTTP_OK);
184    http_response->set_content_type("text/html");
185    http_response->set_content(response_content_);
186    return http_response.PassAs<net::test_server::HttpResponse>();
187  }
188
189  std::string response_content_;
190  scoped_ptr<Scorer> scorer_;
191  scoped_ptr<PhishingClassifier> classifier_;
192  MockFeatureExtractorClock* clock_;  // Owned by classifier_.
193
194  // Features that are in the model.
195  const std::string url_tld_token_net_;
196  const std::string page_link_domain_phishing_;
197  const std::string page_term_login_;
198};
199
200// This test flakes on Mac with force compositing mode.
201// http://crbug.com/316709
202#if defined(OS_MACOSX)
203#define MAYBE_TestClassification DISABLED_TestClassification
204#else
205#define MAYBE_TestClassification TestClassification
206#endif
207IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) {
208  host_resolver()->AddRule("*", "127.0.0.1");
209
210  // No scorer yet, so the classifier is not ready.
211  ASSERT_FALSE(classifier_->is_ready());
212
213  // Now set the scorer.
214  classifier_->set_phishing_scorer(scorer_.get());
215  ASSERT_TRUE(classifier_->is_ready());
216
217  // This test doesn't exercise the extraction timing.
218  EXPECT_CALL(*clock_, Now())
219      .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
220
221  base::string16 page_text = ASCIIToUTF16("login");
222  float phishy_score;
223  FeatureMap features;
224
225  LoadHtml("host.net",
226      "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
227  EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features));
228  // Note: features.features() might contain other features that simply aren't
229  // in the model.
230  EXPECT_THAT(features.features(),
231              AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
232                    Contains(Pair(page_link_domain_phishing_, 1.0)),
233                    Contains(Pair(page_term_login_, 1.0))));
234  EXPECT_FLOAT_EQ(0.5, phishy_score);
235
236  // Change the link domain to something non-phishy.
237  LoadHtml("host.net",
238           "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
239  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
240  EXPECT_THAT(features.features(),
241              AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
242                    Contains(Pair(page_term_login_, 1.0))));
243  EXPECT_THAT(features.features(),
244              Not(Contains(Pair(page_link_domain_phishing_, 1.0))));
245  EXPECT_GE(phishy_score, 0.0);
246  EXPECT_LT(phishy_score, 0.5);
247
248  // Extraction should fail for this case since there is no TLD.
249  LoadHtml("localhost", "<html><body>content</body></html>");
250  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
251  EXPECT_EQ(0U, features.features().size());
252  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
253
254  // Extraction should also fail for this case because the URL is not http.
255  net::SpawnedTestServer https_server(
256      net::SpawnedTestServer::TYPE_HTTPS,
257      net::SpawnedTestServer::kLocalhost,
258      base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
259  ASSERT_TRUE(https_server.Start());
260  std::string host_str("host.net");  // Must outlive replace_host.
261  GURL::Replacements replace_host;
262  replace_host.SetHostStr(host_str);
263  GURL test_url = https_server.GetURL("/files/title1.html");
264  ui_test_utils::NavigateToURL(browser(),
265                               test_url.ReplaceComponents(replace_host));
266  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
267  EXPECT_EQ(0U, features.features().size());
268  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
269
270  // Extraction should fail for this case because the URL is a POST request.
271  LoadHtmlPost("host.net", "<html><body>content</body></html>");
272  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
273  EXPECT_EQ(0U, features.features().size());
274  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
275}
276
277IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, DisableDetection) {
278  // No scorer yet, so the classifier is not ready.
279  EXPECT_FALSE(classifier_->is_ready());
280
281  // Now set the scorer.
282  classifier_->set_phishing_scorer(scorer_.get());
283  EXPECT_TRUE(classifier_->is_ready());
284
285  // Set a NULL scorer, which turns detection back off.
286  classifier_->set_phishing_scorer(NULL);
287  EXPECT_FALSE(classifier_->is_ready());
288}
289
290}  // namespace safe_browsing
291