phishing_classifier_browsertest.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
169e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal// Copyright (c) 2012 The Chromium Authors. All rights reserved. 269e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal// Use of this source code is governed by a BSD-style license that can be 369e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal// found in the LICENSE file. 469e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal// 569e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal// Note that although this is not a "browser" test, it runs as part of 669e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal// browser_tests. This is because WebKit does not work properly if it is 769e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal// shutdown and re-initialized. Since browser_tests runs each test in a 869e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal// new process, this avoids the problem. 969e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal 1069e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "chrome/renderer/safe_browsing/phishing_classifier.h" 1169e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal 1269e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include <string> 1369e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal 1469e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "base/bind.h" 1569e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "base/memory/scoped_ptr.h" 1669e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "base/string16.h" 1769e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "base/utf_string_conversions.h" 1869e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "chrome/common/safe_browsing/client_model.pb.h" 1969e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "chrome/common/safe_browsing/csd.pb.h" 2069e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "chrome/renderer/safe_browsing/features.h" 2169e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" 2269e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "chrome/renderer/safe_browsing/murmurhash3_util.h" 2369e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "chrome/renderer/safe_browsing/scorer.h" 2469e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "content/public/test/render_view_fake_resources_test.h" 2569e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "crypto/sha2.h" 2669e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal#include "testing/gmock/include/gmock/gmock.h" 2769e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal 2869e17611504376e4d4603925f8528dfc890fd2c6Luis Sigalusing ::testing::AllOf; 2969e17611504376e4d4603925f8528dfc890fd2c6Luis Sigalusing ::testing::Contains; 3069e17611504376e4d4603925f8528dfc890fd2c6Luis Sigalusing ::testing::Not; 3169e17611504376e4d4603925f8528dfc890fd2c6Luis Sigalusing ::testing::Pair; 3269e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal 3369e17611504376e4d4603925f8528dfc890fd2c6Luis Sigalnamespace safe_browsing { 3469e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal 3569e17611504376e4d4603925f8528dfc890fd2c6Luis Sigalclass PhishingClassifierTest : public content::RenderViewFakeResourcesTest { 3669e17611504376e4d4603925f8528dfc890fd2c6Luis Sigal protected: 37 PhishingClassifierTest() 38 : url_tld_token_net_(features::kUrlTldToken + std::string("net")), 39 page_link_domain_phishing_(features::kPageLinkDomain + 40 std::string("phishing.com")), 41 page_term_login_(features::kPageTerm + std::string("login")) {} 42 43 virtual void SetUp() { 44 // Set up WebKit and the RenderView. 45 content::RenderViewFakeResourcesTest::SetUp(); 46 47 // Construct a model to test with. We include one feature from each of 48 // the feature extractors, which allows us to verify that they all ran. 49 ClientSideModel model; 50 51 model.add_hashes(crypto::SHA256HashString(url_tld_token_net_)); 52 model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_)); 53 model.add_hashes(crypto::SHA256HashString(page_term_login_)); 54 model.add_hashes(crypto::SHA256HashString("login")); 55 model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken + 56 std::string("net"))); 57 model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain + 58 std::string("phishing.com"))); 59 model.add_hashes(crypto::SHA256HashString(features::kPageTerm + 60 std::string("login"))); 61 model.add_hashes(crypto::SHA256HashString("login")); 62 63 // Add a default rule with a non-phishy weight. 64 ClientSideModel::Rule* rule = model.add_rule(); 65 rule->set_weight(-1.0); 66 67 // To give a phishy score, the total weight needs to be >= 0 68 // (0.5 when converted to a probability). This will only happen 69 // if all of the listed features are present. 70 rule = model.add_rule(); 71 rule->add_feature(0); 72 rule->add_feature(1); 73 rule->add_feature(2); 74 rule->set_weight(1.0); 75 76 model.add_page_term(3); 77 model.set_murmur_hash_seed(2777808611U); 78 model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed())); 79 model.set_max_words_per_term(1); 80 81 clock_ = new MockFeatureExtractorClock; 82 scorer_.reset(Scorer::Create(model.SerializeAsString())); 83 ASSERT_TRUE(scorer_.get()); 84 classifier_.reset(new PhishingClassifier(view(), clock_)); 85 } 86 87 virtual void TearDown() { 88 content::RenderViewFakeResourcesTest::TearDown(); 89 } 90 91 // Helper method to start phishing classification and wait for it to 92 // complete. Returns the true if the page is classified as phishy and 93 // false otherwise. 94 bool RunPhishingClassifier(const string16* page_text, 95 float* phishy_score, 96 FeatureMap* features) { 97 verdict_.Clear(); 98 *phishy_score = PhishingClassifier::kInvalidScore; 99 features->Clear(); 100 101 classifier_->BeginClassification( 102 page_text, 103 base::Bind(&PhishingClassifierTest::ClassificationFinished, 104 base::Unretained(this))); 105 message_loop_.Run(); 106 107 *phishy_score = verdict_.client_score(); 108 for (int i = 0; i < verdict_.feature_map_size(); ++i) { 109 features->AddRealFeature(verdict_.feature_map(i).name(), 110 verdict_.feature_map(i).value()); 111 } 112 return verdict_.is_phishing(); 113 } 114 115 // Completion callback for classification. 116 void ClassificationFinished(const ClientPhishingRequest& verdict) { 117 verdict_ = verdict; // copy the verdict. 118 message_loop_.Quit(); 119 } 120 121 scoped_ptr<Scorer> scorer_; 122 scoped_ptr<PhishingClassifier> classifier_; 123 MockFeatureExtractorClock* clock_; // owned by classifier_ 124 125 // Features that are in the model. 126 const std::string url_tld_token_net_; 127 const std::string page_link_domain_phishing_; 128 const std::string page_term_login_; 129 130 // This member holds the status from the most recent call to the 131 // ClassificationFinished callback. 132 ClientPhishingRequest verdict_; 133}; 134 135TEST_F(PhishingClassifierTest, TestClassification) { 136 // No scorer yet, so the classifier is not ready. 137 EXPECT_FALSE(classifier_->is_ready()); 138 139 // Now set the scorer. 140 classifier_->set_phishing_scorer(scorer_.get()); 141 EXPECT_TRUE(classifier_->is_ready()); 142 143 // This test doesn't exercise the extraction timing. 144 EXPECT_CALL(*clock_, Now()) 145 .WillRepeatedly(::testing::Return(base::TimeTicks::Now())); 146 147 responses_["http://host.net/"] = 148 "<html><body><a href=\"http://phishing.com/\">login</a></body></html>"; 149 LoadURL("http://host.net/"); 150 151 string16 page_text = ASCIIToUTF16("login"); 152 float phishy_score; 153 FeatureMap features; 154 EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 155 // Note: features.features() might contain other features that simply aren't 156 // in the model. 157 EXPECT_THAT(features.features(), 158 AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 159 Contains(Pair(page_link_domain_phishing_, 1.0)), 160 Contains(Pair(page_term_login_, 1.0)))); 161 EXPECT_FLOAT_EQ(0.5, phishy_score); 162 163 // Change the link domain to something non-phishy. 164 responses_["http://host.net/"] = 165 "<html><body><a href=\"http://safe.com/\">login</a></body></html>"; 166 LoadURL("http://host.net/"); 167 168 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 169 EXPECT_THAT(features.features(), 170 AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 171 Contains(Pair(page_term_login_, 1.0)))); 172 EXPECT_THAT(features.features(), 173 Not(Contains(Pair(page_link_domain_phishing_, 1.0)))); 174 EXPECT_GE(phishy_score, 0.0); 175 EXPECT_LT(phishy_score, 0.5); 176 177 // Extraction should fail for this case, since there is no TLD. 178 responses_["http://localhost/"] = "<html><body>content</body></html>"; 179 LoadURL("http://localhost/"); 180 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 181 EXPECT_EQ(0U, features.features().size()); 182 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 183 184 // Extraction should also fail for this case, because the URL is not http. 185 responses_["https://host.net/"] = "<html><body>secure</body></html>"; 186 LoadURL("https://host.net/"); 187 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 188 EXPECT_EQ(0U, features.features().size()); 189 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 190 191 // Extraction should fail for this case because the URL is a POST request. 192 LoadURLWithPost("http://host.net/"); 193 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 194 EXPECT_EQ(0U, features.features().size()); 195 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 196} 197 198TEST_F(PhishingClassifierTest, DisableDetection) { 199 // No scorer yet, so the classifier is not ready. 200 EXPECT_FALSE(classifier_->is_ready()); 201 202 // Now set the scorer. 203 classifier_->set_phishing_scorer(scorer_.get()); 204 EXPECT_TRUE(classifier_->is_ready()); 205 206 // Set a NULL scorer, which turns detection back off. 207 classifier_->set_phishing_scorer(NULL); 208 EXPECT_FALSE(classifier_->is_ready()); 209} 210 211} // namespace safe_browsing 212