phishing_classifier_browsertest.cc revision a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/renderer/safe_browsing/phishing_classifier.h" 6 7#include <string> 8 9#include "base/bind.h" 10#include "base/command_line.h" 11#include "base/memory/scoped_ptr.h" 12#include "base/strings/string16.h" 13#include "base/strings/utf_string_conversions.h" 14#include "chrome/common/chrome_switches.h" 15#include "chrome/common/safe_browsing/client_model.pb.h" 16#include "chrome/common/safe_browsing/csd.pb.h" 17#include "chrome/renderer/safe_browsing/features.h" 18#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" 19#include "chrome/renderer/safe_browsing/murmurhash3_util.h" 20#include "chrome/renderer/safe_browsing/scorer.h" 21#include "chrome/test/base/in_process_browser_test.h" 22#include "chrome/test/base/ui_test_utils.h" 23#include "content/public/renderer/render_view.h" 24#include "crypto/sha2.h" 25#include "net/dns/mock_host_resolver.h" 26#include "net/test/embedded_test_server/embedded_test_server.h" 27#include "net/test/embedded_test_server/http_response.h" 28#include "testing/gmock/include/gmock/gmock.h" 29#include "url/gurl.h" 30 31using ::testing::AllOf; 32using ::testing::Contains; 33using ::testing::Not; 34using ::testing::Pair; 35 36namespace safe_browsing { 37 38class PhishingClassifierTest : public InProcessBrowserTest { 39 protected: 40 PhishingClassifierTest() 41 : url_tld_token_net_(features::kUrlTldToken + std::string("net")), 42 page_link_domain_phishing_(features::kPageLinkDomain + 43 std::string("phishing.com")), 44 page_term_login_(features::kPageTerm + std::string("login")) { 45 } 46 47 virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE { 48 command_line->AppendSwitch(switches::kSingleProcess); 49#if defined(OS_WIN) && defined(USE_AURA) 50 // Don't want to try to create a GPU process. 51 command_line->AppendSwitch(switches::kDisableAcceleratedCompositing); 52#endif 53 } 54 55 virtual void SetUpOnMainThread() OVERRIDE { 56 // Construct a model to test with. We include one feature from each of 57 // the feature extractors, which allows us to verify that they all ran. 58 ClientSideModel model; 59 60 model.add_hashes(crypto::SHA256HashString(url_tld_token_net_)); 61 model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_)); 62 model.add_hashes(crypto::SHA256HashString(page_term_login_)); 63 model.add_hashes(crypto::SHA256HashString("login")); 64 model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken + 65 std::string("net"))); 66 model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain + 67 std::string("phishing.com"))); 68 model.add_hashes(crypto::SHA256HashString(features::kPageTerm + 69 std::string("login"))); 70 model.add_hashes(crypto::SHA256HashString("login")); 71 72 // Add a default rule with a non-phishy weight. 73 ClientSideModel::Rule* rule = model.add_rule(); 74 rule->set_weight(-1.0); 75 76 // To give a phishy score, the total weight needs to be >= 0 77 // (0.5 when converted to a probability). This will only happen 78 // if all of the listed features are present. 79 rule = model.add_rule(); 80 rule->add_feature(0); 81 rule->add_feature(1); 82 rule->add_feature(2); 83 rule->set_weight(1.0); 84 85 model.add_page_term(3); 86 model.set_murmur_hash_seed(2777808611U); 87 model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed())); 88 model.set_max_words_per_term(1); 89 90 clock_ = new MockFeatureExtractorClock; 91 scorer_.reset(Scorer::Create(model.SerializeAsString())); 92 ASSERT_TRUE(scorer_.get()); 93 94 classifier_.reset(new PhishingClassifier( 95 content::RenderView::FromRoutingID(1), 96 clock_)); 97 } 98 99 virtual void TearDownOnMainThread() OVERRIDE { 100 content::RunAllPendingInMessageLoop(); 101 } 102 103 // Helper method to start phishing classification and wait for it to 104 // complete. Returns the true if the page is classified as phishy and 105 // false otherwise. 106 bool RunPhishingClassifier(const base::string16* page_text, 107 float* phishy_score, 108 FeatureMap* features) { 109 ClientPhishingRequest verdict; 110 // The classifier accesses the RenderView and must run in the RenderThread. 111 PostTaskToInProcessRendererAndWait( 112 base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier, 113 base::Unretained(this), 114 page_text, phishy_score, features, &verdict)); 115 return verdict.is_phishing(); 116 } 117 118 void DoRunPhishingClassifier(const base::string16* page_text, 119 float* phishy_score, 120 FeatureMap* features, 121 ClientPhishingRequest* verdict) { 122 *phishy_score = PhishingClassifier::kInvalidScore; 123 features->Clear(); 124 125 // Force synchronous behavior for ease of unittesting. 126 base::RunLoop run_loop; 127 classifier_->BeginClassification( 128 page_text, 129 base::Bind(&PhishingClassifierTest::ClassificationFinished, 130 base::Unretained(this), &run_loop, verdict)); 131 content::RunThisRunLoop(&run_loop); 132 133 *phishy_score = verdict->client_score(); 134 for (int i = 0; i < verdict->feature_map_size(); ++i) { 135 features->AddRealFeature(verdict->feature_map(i).name(), 136 verdict->feature_map(i).value()); 137 } 138 } 139 140 // Completion callback for classification. 141 void ClassificationFinished(base::RunLoop* run_loop, 142 ClientPhishingRequest* verdict_out, 143 const ClientPhishingRequest& verdict) { 144 *verdict_out = verdict; // Copy the verdict. 145 run_loop->Quit(); 146 } 147 148 scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_; 149 net::test_server::EmbeddedTestServer* embedded_test_server() { 150 // TODO(ajwong): Merge this into BrowserTestBase. 151 if (!embedded_test_server_) { 152 embedded_test_server_.reset(new net::test_server::EmbeddedTestServer()); 153 embedded_test_server_->RegisterRequestHandler( 154 base::Bind(&PhishingClassifierTest::HandleRequest, 155 base::Unretained(this))); 156 CHECK(embedded_test_server_->InitializeAndWaitUntilReady()); 157 } 158 return embedded_test_server_.get(); 159 } 160 161 void LoadHtml(const std::string& host, const std::string& content) { 162 GURL::Replacements replace_host; 163 replace_host.SetHostStr(host); 164 response_content_ = content; 165 ui_test_utils::NavigateToURL( 166 browser(), 167 embedded_test_server()->base_url().ReplaceComponents(replace_host)); 168 } 169 170 void LoadHtmlPost(const std::string& host, const std::string& content) { 171 GURL::Replacements replace_host; 172 replace_host.SetHostStr(host); 173 response_content_ = content; 174 ui_test_utils::NavigateToURLWithPost( 175 browser(), 176 embedded_test_server()->base_url().ReplaceComponents(replace_host)); 177 } 178 179 scoped_ptr<net::test_server::HttpResponse> 180 HandleRequest(const net::test_server::HttpRequest& request) { 181 scoped_ptr<net::test_server::BasicHttpResponse> http_response( 182 new net::test_server::BasicHttpResponse()); 183 http_response->set_code(net::HTTP_OK); 184 http_response->set_content_type("text/html"); 185 http_response->set_content(response_content_); 186 return http_response.PassAs<net::test_server::HttpResponse>(); 187 } 188 189 std::string response_content_; 190 scoped_ptr<Scorer> scorer_; 191 scoped_ptr<PhishingClassifier> classifier_; 192 MockFeatureExtractorClock* clock_; // Owned by classifier_. 193 194 // Features that are in the model. 195 const std::string url_tld_token_net_; 196 const std::string page_link_domain_phishing_; 197 const std::string page_term_login_; 198}; 199 200// This test flakes on Mac with force compositing mode. 201// http://crbug.com/316709 202#if defined(OS_MACOSX) 203#define MAYBE_TestClassification DISABLED_TestClassification 204#else 205#define MAYBE_TestClassification TestClassification 206#endif 207IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) { 208 host_resolver()->AddRule("*", "127.0.0.1"); 209 210 // No scorer yet, so the classifier is not ready. 211 ASSERT_FALSE(classifier_->is_ready()); 212 213 // Now set the scorer. 214 classifier_->set_phishing_scorer(scorer_.get()); 215 ASSERT_TRUE(classifier_->is_ready()); 216 217 // This test doesn't exercise the extraction timing. 218 EXPECT_CALL(*clock_, Now()) 219 .WillRepeatedly(::testing::Return(base::TimeTicks::Now())); 220 221 base::string16 page_text = ASCIIToUTF16("login"); 222 float phishy_score; 223 FeatureMap features; 224 225 LoadHtml("host.net", 226 "<html><body><a href=\"http://phishing.com/\">login</a></body></html>"); 227 EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 228 // Note: features.features() might contain other features that simply aren't 229 // in the model. 230 EXPECT_THAT(features.features(), 231 AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 232 Contains(Pair(page_link_domain_phishing_, 1.0)), 233 Contains(Pair(page_term_login_, 1.0)))); 234 EXPECT_FLOAT_EQ(0.5, phishy_score); 235 236 // Change the link domain to something non-phishy. 237 LoadHtml("host.net", 238 "<html><body><a href=\"http://safe.com/\">login</a></body></html>"); 239 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 240 EXPECT_THAT(features.features(), 241 AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 242 Contains(Pair(page_term_login_, 1.0)))); 243 EXPECT_THAT(features.features(), 244 Not(Contains(Pair(page_link_domain_phishing_, 1.0)))); 245 EXPECT_GE(phishy_score, 0.0); 246 EXPECT_LT(phishy_score, 0.5); 247 248 // Extraction should fail for this case since there is no TLD. 249 LoadHtml("localhost", "<html><body>content</body></html>"); 250 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 251 EXPECT_EQ(0U, features.features().size()); 252 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 253 254 // Extraction should also fail for this case because the URL is not http. 255 net::SpawnedTestServer https_server( 256 net::SpawnedTestServer::TYPE_HTTPS, 257 net::SpawnedTestServer::kLocalhost, 258 base::FilePath(FILE_PATH_LITERAL("chrome/test/data"))); 259 ASSERT_TRUE(https_server.Start()); 260 std::string host_str("host.net"); // Must outlive replace_host. 261 GURL::Replacements replace_host; 262 replace_host.SetHostStr(host_str); 263 GURL test_url = https_server.GetURL("/files/title1.html"); 264 ui_test_utils::NavigateToURL(browser(), 265 test_url.ReplaceComponents(replace_host)); 266 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 267 EXPECT_EQ(0U, features.features().size()); 268 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 269 270 // Extraction should fail for this case because the URL is a POST request. 271 LoadHtmlPost("host.net", "<html><body>content</body></html>"); 272 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 273 EXPECT_EQ(0U, features.features().size()); 274 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 275} 276 277IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, DisableDetection) { 278 // No scorer yet, so the classifier is not ready. 279 EXPECT_FALSE(classifier_->is_ready()); 280 281 // Now set the scorer. 282 classifier_->set_phishing_scorer(scorer_.get()); 283 EXPECT_TRUE(classifier_->is_ready()); 284 285 // Set a NULL scorer, which turns detection back off. 286 classifier_->set_phishing_scorer(NULL); 287 EXPECT_FALSE(classifier_->is_ready()); 288} 289 290} // namespace safe_browsing 291