phishing_classifier_browsertest.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/phishing_classifier.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h" 10f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/command_line.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h" 127d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "base/strings/string16.h" 13868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h" 14f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/common/chrome_switches.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/client_model.pb.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/csd.pb.h" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/features.h" 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/murmurhash3_util.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/scorer.h" 21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/test/base/in_process_browser_test.h" 22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/test/base/ui_test_utils.h" 23f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/public/renderer/render_view.h" 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "crypto/sha2.h" 25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/dns/mock_host_resolver.h" 26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/embedded_test_server.h" 27f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/http_response.h" 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "testing/gmock/include/gmock/gmock.h" 29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "url/gurl.h" 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::AllOf; 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Contains; 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Not; 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Pair; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace { 375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// The first RenderFrame is routing ID 1, and the first RenderView is 2. 395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const int kRenderViewRoutingId = 2; 405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace safe_browsing { 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class PhishingClassifierTest : public InProcessBrowserTest { 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected: 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PhishingClassifierTest() 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : url_tld_token_net_(features::kUrlTldToken + std::string("net")), 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) page_link_domain_phishing_(features::kPageLinkDomain + 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string("phishing.com")), 51f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) page_term_login_(features::kPageTerm + std::string("login")) { 52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE { 55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) command_line->AppendSwitch(switches::kSingleProcess); 565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#if defined(OS_WIN) 57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Don't want to try to create a GPU process. 58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) command_line->AppendSwitch(switches::kDisableAcceleratedCompositing); 59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif 60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) virtual void SetUpOnMainThread() OVERRIDE { 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Construct a model to test with. We include one feature from each of 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the feature extractors, which allows us to verify that they all ran. 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ClientSideModel model; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(url_tld_token_net_)); 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_)); 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(page_term_login_)); 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString("login")); 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken + 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string("net"))); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain + 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string("phishing.com"))); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(features::kPageTerm + 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string("login"))); 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString("login")); 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Add a default rule with a non-phishy weight. 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ClientSideModel::Rule* rule = model.add_rule(); 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->set_weight(-1.0); 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // To give a phishy score, the total weight needs to be >= 0 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (0.5 when converted to a probability). This will only happen 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // if all of the listed features are present. 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule = model.add_rule(); 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->add_feature(0); 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->add_feature(1); 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->add_feature(2); 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->set_weight(1.0); 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_page_term(3); 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.set_murmur_hash_seed(2777808611U); 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed())); 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.set_max_words_per_term(1); 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) clock_ = new MockFeatureExtractorClock; 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scorer_.reset(Scorer::Create(model.SerializeAsString())); 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ASSERT_TRUE(scorer_.get()); 100f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 101f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) classifier_.reset(new PhishingClassifier( 1025d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) content::RenderView::FromRoutingID(kRenderViewRoutingId), 103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) clock_)); 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 106f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) virtual void TearDownOnMainThread() OVERRIDE { 107f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) content::RunAllPendingInMessageLoop(); 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Helper method to start phishing classification and wait for it to 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // complete. Returns the true if the page is classified as phishy and 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // false otherwise. 113a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) bool RunPhishingClassifier(const base::string16* page_text, 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float* phishy_score, 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FeatureMap* features) { 116f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ClientPhishingRequest verdict; 117f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // The classifier accesses the RenderView and must run in the RenderThread. 118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) PostTaskToInProcessRendererAndWait( 119f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier, 120f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Unretained(this), 121f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) page_text, phishy_score, features, &verdict)); 122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return verdict.is_phishing(); 123f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 124f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 125a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) void DoRunPhishingClassifier(const base::string16* page_text, 126f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) float* phishy_score, 127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) FeatureMap* features, 128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ClientPhishingRequest* verdict) { 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *phishy_score = PhishingClassifier::kInvalidScore; 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) features->Clear(); 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Force synchronous behavior for ease of unittesting. 133f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::RunLoop run_loop; 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) classifier_->BeginClassification( 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) page_text, 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&PhishingClassifierTest::ClassificationFinished, 137f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Unretained(this), &run_loop, verdict)); 138f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) content::RunThisRunLoop(&run_loop); 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 140f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *phishy_score = verdict->client_score(); 141f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) for (int i = 0; i < verdict->feature_map_size(); ++i) { 142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) features->AddRealFeature(verdict->feature_map(i).name(), 143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) verdict->feature_map(i).value()); 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Completion callback for classification. 148f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void ClassificationFinished(base::RunLoop* run_loop, 149f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ClientPhishingRequest* verdict_out, 150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) const ClientPhishingRequest& verdict) { 151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *verdict_out = verdict; // Copy the verdict. 152f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) run_loop->Quit(); 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_; 156f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) net::test_server::EmbeddedTestServer* embedded_test_server() { 157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // TODO(ajwong): Merge this into BrowserTestBase. 158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if (!embedded_test_server_) { 159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) embedded_test_server_.reset(new net::test_server::EmbeddedTestServer()); 160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) embedded_test_server_->RegisterRequestHandler( 161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Bind(&PhishingClassifierTest::HandleRequest, 162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Unretained(this))); 163f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) CHECK(embedded_test_server_->InitializeAndWaitUntilReady()); 164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return embedded_test_server_.get(); 166f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 167f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 168f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void LoadHtml(const std::string& host, const std::string& content) { 169f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GURL::Replacements replace_host; 170f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) replace_host.SetHostStr(host); 171f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) response_content_ = content; 172f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ui_test_utils::NavigateToURL( 173f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) browser(), 174f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) embedded_test_server()->base_url().ReplaceComponents(replace_host)); 175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void LoadHtmlPost(const std::string& host, const std::string& content) { 178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GURL::Replacements replace_host; 179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) replace_host.SetHostStr(host); 180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) response_content_ = content; 181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ui_test_utils::NavigateToURLWithPost( 182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) browser(), 183f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) embedded_test_server()->base_url().ReplaceComponents(replace_host)); 184f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 185f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 186f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<net::test_server::HttpResponse> 187f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) HandleRequest(const net::test_server::HttpRequest& request) { 188f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<net::test_server::BasicHttpResponse> http_response( 189f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) new net::test_server::BasicHttpResponse()); 190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) http_response->set_code(net::HTTP_OK); 191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) http_response->set_content_type("text/html"); 192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) http_response->set_content(response_content_); 193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return http_response.PassAs<net::test_server::HttpResponse>(); 194f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 195f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 196f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) std::string response_content_; 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<Scorer> scorer_; 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<PhishingClassifier> classifier_; 199f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) MockFeatureExtractorClock* clock_; // Owned by classifier_. 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Features that are in the model. 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string url_tld_token_net_; 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string page_link_domain_phishing_; 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string page_term_login_; 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 207f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// This test flakes on Mac with force compositing mode. 208f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// http://crbug.com/316709 209f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#if defined(OS_MACOSX) 210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define MAYBE_TestClassification DISABLED_TestClassification 211f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#else 212f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define MAYBE_TestClassification TestClassification 213f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif 214f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) { 215f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) host_resolver()->AddRule("*", "127.0.0.1"); 216f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // No scorer yet, so the classifier is not ready. 218f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ASSERT_FALSE(classifier_->is_ready()); 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Now set the scorer. 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) classifier_->set_phishing_scorer(scorer_.get()); 222f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ASSERT_TRUE(classifier_->is_ready()); 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This test doesn't exercise the extraction timing. 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_CALL(*clock_, Now()) 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) .WillRepeatedly(::testing::Return(base::TimeTicks::Now())); 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::string16 page_text = base::ASCIIToUTF16("login"); 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float phishy_score; 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FeatureMap features; 231f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 232f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) LoadHtml("host.net", 233f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "<html><body><a href=\"http://phishing.com/\">login</a></body></html>"); 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note: features.features() might contain other features that simply aren't 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // in the model. 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_THAT(features.features(), 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Contains(Pair(page_link_domain_phishing_, 1.0)), 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Contains(Pair(page_term_login_, 1.0)))); 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FLOAT_EQ(0.5, phishy_score); 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Change the link domain to something non-phishy. 244f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) LoadHtml("host.net", 245f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "<html><body><a href=\"http://safe.com/\">login</a></body></html>"); 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_THAT(features.features(), 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Contains(Pair(page_term_login_, 1.0)))); 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_THAT(features.features(), 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Not(Contains(Pair(page_link_domain_phishing_, 1.0)))); 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_GE(phishy_score, 0.0); 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_LT(phishy_score, 0.5); 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Extraction should fail for this case since there is no TLD. 256f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) LoadHtml("localhost", "<html><body>content</body></html>"); 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(0U, features.features().size()); 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 261f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Extraction should also fail for this case because the URL is not http. 262f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) net::SpawnedTestServer https_server( 263f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) net::SpawnedTestServer::TYPE_HTTPS, 264f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) net::SpawnedTestServer::kLocalhost, 265f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::FilePath(FILE_PATH_LITERAL("chrome/test/data"))); 266f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ASSERT_TRUE(https_server.Start()); 267f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) std::string host_str("host.net"); // Must outlive replace_host. 268f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GURL::Replacements replace_host; 269f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) replace_host.SetHostStr(host_str); 270f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GURL test_url = https_server.GetURL("/files/title1.html"); 271f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ui_test_utils::NavigateToURL(browser(), 272f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) test_url.ReplaceComponents(replace_host)); 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(0U, features.features().size()); 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Extraction should fail for this case because the URL is a POST request. 278f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) LoadHtmlPost("host.net", "<html><body>content</body></html>"); 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(0U, features.features().size()); 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 284f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, DisableDetection) { 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // No scorer yet, so the classifier is not ready. 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(classifier_->is_ready()); 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Now set the scorer. 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) classifier_->set_phishing_scorer(scorer_.get()); 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_TRUE(classifier_->is_ready()); 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Set a NULL scorer, which turns detection back off. 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) classifier_->set_phishing_scorer(NULL); 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(classifier_->is_ready()); 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace safe_browsing 298