1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/renderer/safe_browsing/phishing_classifier.h"
6
7#include <string>
8
9#include "base/bind.h"
10#include "base/command_line.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/strings/string16.h"
13#include "base/strings/utf_string_conversions.h"
14#include "chrome/common/chrome_switches.h"
15#include "chrome/common/safe_browsing/client_model.pb.h"
16#include "chrome/common/safe_browsing/csd.pb.h"
17#include "chrome/renderer/safe_browsing/features.h"
18#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
19#include "chrome/renderer/safe_browsing/murmurhash3_util.h"
20#include "chrome/renderer/safe_browsing/scorer.h"
21#include "chrome/test/base/in_process_browser_test.h"
22#include "chrome/test/base/ui_test_utils.h"
23#include "content/public/renderer/render_view.h"
24#include "crypto/sha2.h"
25#include "net/dns/mock_host_resolver.h"
26#include "net/test/embedded_test_server/embedded_test_server.h"
27#include "net/test/embedded_test_server/http_response.h"
28#include "testing/gmock/include/gmock/gmock.h"
29#include "url/gurl.h"
30
31using ::testing::AllOf;
32using ::testing::Contains;
33using ::testing::Not;
34using ::testing::Pair;
35
36namespace {
37
38// The first RenderFrame is routing ID 1, and the first RenderView is 2.
39const int kRenderViewRoutingId = 2;
40
41}
42
43namespace safe_browsing {
44
45class PhishingClassifierTest : public InProcessBrowserTest {
46 protected:
47  PhishingClassifierTest()
48      : url_tld_token_net_(features::kUrlTldToken + std::string("net")),
49        page_link_domain_phishing_(features::kPageLinkDomain +
50                                   std::string("phishing.com")),
51        page_term_login_(features::kPageTerm + std::string("login")) {
52  }
53
54  virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
55    command_line->AppendSwitch(switches::kSingleProcess);
56#if defined(OS_WIN)
57    // Don't want to try to create a GPU process.
58    command_line->AppendSwitch(switches::kDisableGpu);
59#endif
60  }
61
62  virtual void SetUpOnMainThread() OVERRIDE {
63    // Construct a model to test with.  We include one feature from each of
64    // the feature extractors, which allows us to verify that they all ran.
65    ClientSideModel model;
66
67    model.add_hashes(crypto::SHA256HashString(url_tld_token_net_));
68    model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_));
69    model.add_hashes(crypto::SHA256HashString(page_term_login_));
70    model.add_hashes(crypto::SHA256HashString("login"));
71    model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken +
72                                              std::string("net")));
73    model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain +
74                                              std::string("phishing.com")));
75    model.add_hashes(crypto::SHA256HashString(features::kPageTerm +
76                                              std::string("login")));
77    model.add_hashes(crypto::SHA256HashString("login"));
78
79    // Add a default rule with a non-phishy weight.
80    ClientSideModel::Rule* rule = model.add_rule();
81    rule->set_weight(-1.0);
82
83    // To give a phishy score, the total weight needs to be >= 0
84    // (0.5 when converted to a probability).  This will only happen
85    // if all of the listed features are present.
86    rule = model.add_rule();
87    rule->add_feature(0);
88    rule->add_feature(1);
89    rule->add_feature(2);
90    rule->set_weight(1.0);
91
92    model.add_page_term(3);
93    model.set_murmur_hash_seed(2777808611U);
94    model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed()));
95    model.set_max_words_per_term(1);
96    model.set_max_shingles_per_page(100);
97    model.set_shingle_size(3);
98
99    clock_ = new MockFeatureExtractorClock;
100    scorer_.reset(Scorer::Create(model.SerializeAsString()));
101    ASSERT_TRUE(scorer_.get());
102
103    classifier_.reset(new PhishingClassifier(
104        content::RenderView::FromRoutingID(kRenderViewRoutingId),
105        clock_));
106  }
107
108  virtual void TearDownOnMainThread() OVERRIDE {
109    content::RunAllPendingInMessageLoop();
110  }
111
112  // Helper method to start phishing classification and wait for it to
113  // complete.  Returns the true if the page is classified as phishy and
114  // false otherwise.
115  bool RunPhishingClassifier(const base::string16* page_text,
116                             float* phishy_score,
117                             FeatureMap* features) {
118    ClientPhishingRequest verdict;
119    // The classifier accesses the RenderView and must run in the RenderThread.
120    PostTaskToInProcessRendererAndWait(
121        base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier,
122                   base::Unretained(this),
123                   page_text, phishy_score, features, &verdict));
124    return verdict.is_phishing();
125  }
126
127  void DoRunPhishingClassifier(const base::string16* page_text,
128                               float* phishy_score,
129                               FeatureMap* features,
130                               ClientPhishingRequest* verdict) {
131    *phishy_score = PhishingClassifier::kInvalidScore;
132    features->Clear();
133
134    // Force synchronous behavior for ease of unittesting.
135    base::RunLoop run_loop;
136    classifier_->BeginClassification(
137        page_text,
138        base::Bind(&PhishingClassifierTest::ClassificationFinished,
139                   base::Unretained(this), &run_loop, verdict));
140    content::RunThisRunLoop(&run_loop);
141
142    *phishy_score = verdict->client_score();
143    for (int i = 0; i < verdict->feature_map_size(); ++i) {
144      features->AddRealFeature(verdict->feature_map(i).name(),
145                               verdict->feature_map(i).value());
146    }
147  }
148
149  // Completion callback for classification.
150  void ClassificationFinished(base::RunLoop* run_loop,
151                              ClientPhishingRequest* verdict_out,
152                              const ClientPhishingRequest& verdict) {
153    *verdict_out = verdict;  // Copy the verdict.
154    run_loop->Quit();
155  }
156
157  scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
158  net::test_server::EmbeddedTestServer* embedded_test_server() {
159    // TODO(ajwong): Merge this into BrowserTestBase.
160    if (!embedded_test_server_) {
161      embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
162      embedded_test_server_->RegisterRequestHandler(
163          base::Bind(&PhishingClassifierTest::HandleRequest,
164                     base::Unretained(this)));
165      CHECK(embedded_test_server_->InitializeAndWaitUntilReady());
166    }
167    return embedded_test_server_.get();
168  }
169
170  void LoadHtml(const std::string& host, const std::string& content) {
171    GURL::Replacements replace_host;
172    replace_host.SetHostStr(host);
173    response_content_ = content;
174    ui_test_utils::NavigateToURL(
175        browser(),
176        embedded_test_server()->base_url().ReplaceComponents(replace_host));
177  }
178
179  void LoadHtmlPost(const std::string& host, const std::string& content) {
180    GURL::Replacements replace_host;
181    replace_host.SetHostStr(host);
182    response_content_ = content;
183    ui_test_utils::NavigateToURLWithPost(
184        browser(),
185        embedded_test_server()->base_url().ReplaceComponents(replace_host));
186  }
187
188  scoped_ptr<net::test_server::HttpResponse>
189      HandleRequest(const net::test_server::HttpRequest& request) {
190    scoped_ptr<net::test_server::BasicHttpResponse> http_response(
191        new net::test_server::BasicHttpResponse());
192    http_response->set_code(net::HTTP_OK);
193    http_response->set_content_type("text/html");
194    http_response->set_content(response_content_);
195    return http_response.PassAs<net::test_server::HttpResponse>();
196  }
197
198  std::string response_content_;
199  scoped_ptr<Scorer> scorer_;
200  scoped_ptr<PhishingClassifier> classifier_;
201  MockFeatureExtractorClock* clock_;  // Owned by classifier_.
202
203  // Features that are in the model.
204  const std::string url_tld_token_net_;
205  const std::string page_link_domain_phishing_;
206  const std::string page_term_login_;
207};
208
209// This test flakes on Mac with force compositing mode.
210// http://crbug.com/316709
211#if defined(OS_MACOSX)
212#define MAYBE_TestClassification DISABLED_TestClassification
213#else
214#define MAYBE_TestClassification TestClassification
215#endif
216IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) {
217  host_resolver()->AddRule("*", "127.0.0.1");
218
219  // No scorer yet, so the classifier is not ready.
220  ASSERT_FALSE(classifier_->is_ready());
221
222  // Now set the scorer.
223  classifier_->set_phishing_scorer(scorer_.get());
224  ASSERT_TRUE(classifier_->is_ready());
225
226  // This test doesn't exercise the extraction timing.
227  EXPECT_CALL(*clock_, Now())
228      .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
229
230  base::string16 page_text = base::ASCIIToUTF16("login");
231  float phishy_score;
232  FeatureMap features;
233
234  LoadHtml("host.net",
235      "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
236  EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features));
237  // Note: features.features() might contain other features that simply aren't
238  // in the model.
239  EXPECT_THAT(features.features(),
240              AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
241                    Contains(Pair(page_link_domain_phishing_, 1.0)),
242                    Contains(Pair(page_term_login_, 1.0))));
243  EXPECT_FLOAT_EQ(0.5, phishy_score);
244
245  // Change the link domain to something non-phishy.
246  LoadHtml("host.net",
247           "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
248  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
249  EXPECT_THAT(features.features(),
250              AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
251                    Contains(Pair(page_term_login_, 1.0))));
252  EXPECT_THAT(features.features(),
253              Not(Contains(Pair(page_link_domain_phishing_, 1.0))));
254  EXPECT_GE(phishy_score, 0.0);
255  EXPECT_LT(phishy_score, 0.5);
256
257  // Extraction should fail for this case since there is no TLD.
258  LoadHtml("localhost", "<html><body>content</body></html>");
259  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
260  EXPECT_EQ(0U, features.features().size());
261  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
262
263  // Extraction should also fail for this case because the URL is not http.
264  net::SpawnedTestServer https_server(
265      net::SpawnedTestServer::TYPE_HTTPS,
266      net::SpawnedTestServer::kLocalhost,
267      base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
268  ASSERT_TRUE(https_server.Start());
269  std::string host_str("host.net");  // Must outlive replace_host.
270  GURL::Replacements replace_host;
271  replace_host.SetHostStr(host_str);
272  GURL test_url = https_server.GetURL("/files/title1.html");
273  ui_test_utils::NavigateToURL(browser(),
274                               test_url.ReplaceComponents(replace_host));
275  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
276  EXPECT_EQ(0U, features.features().size());
277  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
278
279  // Extraction should fail for this case because the URL is a POST request.
280  LoadHtmlPost("host.net", "<html><body>content</body></html>");
281  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
282  EXPECT_EQ(0U, features.features().size());
283  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
284}
285
286// Test flakes with LSAN enabled. See http://crbug.com/373155.
287#if defined(LEAK_SANITIZER)
288#define MAYBE_DisableDetection DISABLED_DisableDetection
289#else
290#define MAYBE_DisableDetection DisableDetection
291#endif
292IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_DisableDetection) {
293  // No scorer yet, so the classifier is not ready.
294  EXPECT_FALSE(classifier_->is_ready());
295
296  // Now set the scorer.
297  classifier_->set_phishing_scorer(scorer_.get());
298  EXPECT_TRUE(classifier_->is_ready());
299
300  // Set a NULL scorer, which turns detection back off.
301  classifier_->set_phishing_scorer(NULL);
302  EXPECT_FALSE(classifier_->is_ready());
303}
304
305}  // namespace safe_browsing
306