phishing_classifier_browsertest.cc revision cedac228d2dd51db4b79ea1e72c7f249408ee061
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/phishing_classifier.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h" 10f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/command_line.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h" 127d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "base/strings/string16.h" 13868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h" 14f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/common/chrome_switches.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/client_model.pb.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/csd.pb.h" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/features.h" 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/murmurhash3_util.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/scorer.h" 21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/test/base/in_process_browser_test.h" 22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/test/base/ui_test_utils.h" 23f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/public/renderer/render_view.h" 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "crypto/sha2.h" 25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/dns/mock_host_resolver.h" 26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/embedded_test_server.h" 27f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/http_response.h" 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "testing/gmock/include/gmock/gmock.h" 29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "url/gurl.h" 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::AllOf; 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Contains; 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Not; 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Pair; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace { 375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// The first RenderFrame is routing ID 1, and the first RenderView is 2. 395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const int kRenderViewRoutingId = 2; 405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace safe_browsing { 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class PhishingClassifierTest : public InProcessBrowserTest { 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected: 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PhishingClassifierTest() 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : url_tld_token_net_(features::kUrlTldToken + std::string("net")), 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) page_link_domain_phishing_(features::kPageLinkDomain + 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string("phishing.com")), 51f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) page_term_login_(features::kPageTerm + std::string("login")) { 52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE { 55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) command_line->AppendSwitch(switches::kSingleProcess); 565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#if defined(OS_WIN) 57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Don't want to try to create a GPU process. 580529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch command_line->AppendSwitch(switches::kDisableGpu); 59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif 60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) virtual void SetUpOnMainThread() OVERRIDE { 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Construct a model to test with. We include one feature from each of 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the feature extractors, which allows us to verify that they all ran. 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ClientSideModel model; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(url_tld_token_net_)); 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_)); 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(page_term_login_)); 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString("login")); 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken + 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string("net"))); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain + 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string("phishing.com"))); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString(features::kPageTerm + 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string("login"))); 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_hashes(crypto::SHA256HashString("login")); 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Add a default rule with a non-phishy weight. 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ClientSideModel::Rule* rule = model.add_rule(); 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->set_weight(-1.0); 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // To give a phishy score, the total weight needs to be >= 0 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (0.5 when converted to a probability). This will only happen 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // if all of the listed features are present. 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule = model.add_rule(); 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->add_feature(0); 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->add_feature(1); 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->add_feature(2); 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rule->set_weight(1.0); 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_page_term(3); 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.set_murmur_hash_seed(2777808611U); 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed())); 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) model.set_max_words_per_term(1); 96cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) model.set_max_shingles_per_page(100); 97cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) model.set_shingle_size(3); 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) clock_ = new MockFeatureExtractorClock; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scorer_.reset(Scorer::Create(model.SerializeAsString())); 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ASSERT_TRUE(scorer_.get()); 102f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) classifier_.reset(new PhishingClassifier( 1045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) content::RenderView::FromRoutingID(kRenderViewRoutingId), 105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) clock_)); 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) virtual void TearDownOnMainThread() OVERRIDE { 109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) content::RunAllPendingInMessageLoop(); 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Helper method to start phishing classification and wait for it to 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // complete. Returns the true if the page is classified as phishy and 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // false otherwise. 115a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) bool RunPhishingClassifier(const base::string16* page_text, 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float* phishy_score, 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FeatureMap* features) { 118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ClientPhishingRequest verdict; 119f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // The classifier accesses the RenderView and must run in the RenderThread. 120f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) PostTaskToInProcessRendererAndWait( 121f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier, 122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Unretained(this), 123f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) page_text, phishy_score, features, &verdict)); 124f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return verdict.is_phishing(); 125f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 126f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 127a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) void DoRunPhishingClassifier(const base::string16* page_text, 128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) float* phishy_score, 129f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) FeatureMap* features, 130f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ClientPhishingRequest* verdict) { 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *phishy_score = PhishingClassifier::kInvalidScore; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) features->Clear(); 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 134f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Force synchronous behavior for ease of unittesting. 135f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::RunLoop run_loop; 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) classifier_->BeginClassification( 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) page_text, 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Bind(&PhishingClassifierTest::ClassificationFinished, 139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Unretained(this), &run_loop, verdict)); 140f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) content::RunThisRunLoop(&run_loop); 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *phishy_score = verdict->client_score(); 143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) for (int i = 0; i < verdict->feature_map_size(); ++i) { 144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) features->AddRealFeature(verdict->feature_map(i).name(), 145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) verdict->feature_map(i).value()); 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Completion callback for classification. 150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void ClassificationFinished(base::RunLoop* run_loop, 151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ClientPhishingRequest* verdict_out, 152f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) const ClientPhishingRequest& verdict) { 153f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) *verdict_out = verdict; // Copy the verdict. 154f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) run_loop->Quit(); 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_; 158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) net::test_server::EmbeddedTestServer* embedded_test_server() { 159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // TODO(ajwong): Merge this into BrowserTestBase. 160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if (!embedded_test_server_) { 161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) embedded_test_server_.reset(new net::test_server::EmbeddedTestServer()); 162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) embedded_test_server_->RegisterRequestHandler( 163f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Bind(&PhishingClassifierTest::HandleRequest, 164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Unretained(this))); 165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) CHECK(embedded_test_server_->InitializeAndWaitUntilReady()); 166f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 167f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return embedded_test_server_.get(); 168f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 169f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 170f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void LoadHtml(const std::string& host, const std::string& content) { 171f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GURL::Replacements replace_host; 172f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) replace_host.SetHostStr(host); 173f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) response_content_ = content; 174f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ui_test_utils::NavigateToURL( 175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) browser(), 176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) embedded_test_server()->base_url().ReplaceComponents(replace_host)); 177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void LoadHtmlPost(const std::string& host, const std::string& content) { 180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GURL::Replacements replace_host; 181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) replace_host.SetHostStr(host); 182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) response_content_ = content; 183f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ui_test_utils::NavigateToURLWithPost( 184f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) browser(), 185f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) embedded_test_server()->base_url().ReplaceComponents(replace_host)); 186f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 187f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 188f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<net::test_server::HttpResponse> 189f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) HandleRequest(const net::test_server::HttpRequest& request) { 190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<net::test_server::BasicHttpResponse> http_response( 191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) new net::test_server::BasicHttpResponse()); 192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) http_response->set_code(net::HTTP_OK); 193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) http_response->set_content_type("text/html"); 194f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) http_response->set_content(response_content_); 195f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return http_response.PassAs<net::test_server::HttpResponse>(); 196f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 197f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 198f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) std::string response_content_; 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<Scorer> scorer_; 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<PhishingClassifier> classifier_; 201f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) MockFeatureExtractorClock* clock_; // Owned by classifier_. 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Features that are in the model. 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string url_tld_token_net_; 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string page_link_domain_phishing_; 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string page_term_login_; 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 209f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// This test flakes on Mac with force compositing mode. 210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// http://crbug.com/316709 211f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#if defined(OS_MACOSX) 212f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define MAYBE_TestClassification DISABLED_TestClassification 213f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#else 214f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define MAYBE_TestClassification TestClassification 215f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif 216f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) { 217f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) host_resolver()->AddRule("*", "127.0.0.1"); 218f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // No scorer yet, so the classifier is not ready. 220f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ASSERT_FALSE(classifier_->is_ready()); 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Now set the scorer. 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) classifier_->set_phishing_scorer(scorer_.get()); 224f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ASSERT_TRUE(classifier_->is_ready()); 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This test doesn't exercise the extraction timing. 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_CALL(*clock_, Now()) 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) .WillRepeatedly(::testing::Return(base::TimeTicks::Now())); 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::string16 page_text = base::ASCIIToUTF16("login"); 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float phishy_score; 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FeatureMap features; 233f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 234f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) LoadHtml("host.net", 235f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "<html><body><a href=\"http://phishing.com/\">login</a></body></html>"); 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note: features.features() might contain other features that simply aren't 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // in the model. 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_THAT(features.features(), 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Contains(Pair(page_link_domain_phishing_, 1.0)), 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Contains(Pair(page_term_login_, 1.0)))); 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FLOAT_EQ(0.5, phishy_score); 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Change the link domain to something non-phishy. 246f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) LoadHtml("host.net", 247f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) "<html><body><a href=\"http://safe.com/\">login</a></body></html>"); 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_THAT(features.features(), 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Contains(Pair(page_term_login_, 1.0)))); 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_THAT(features.features(), 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Not(Contains(Pair(page_link_domain_phishing_, 1.0)))); 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_GE(phishy_score, 0.0); 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_LT(phishy_score, 0.5); 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 257f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Extraction should fail for this case since there is no TLD. 258f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) LoadHtml("localhost", "<html><body>content</body></html>"); 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(0U, features.features().size()); 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 263f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Extraction should also fail for this case because the URL is not http. 264f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) net::SpawnedTestServer https_server( 265f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) net::SpawnedTestServer::TYPE_HTTPS, 266f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) net::SpawnedTestServer::kLocalhost, 267f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::FilePath(FILE_PATH_LITERAL("chrome/test/data"))); 268f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ASSERT_TRUE(https_server.Start()); 269f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) std::string host_str("host.net"); // Must outlive replace_host. 270f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GURL::Replacements replace_host; 271f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) replace_host.SetHostStr(host_str); 272f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GURL test_url = https_server.GetURL("/files/title1.html"); 273f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ui_test_utils::NavigateToURL(browser(), 274f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) test_url.ReplaceComponents(replace_host)); 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(0U, features.features().size()); 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Extraction should fail for this case because the URL is a POST request. 280f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) LoadHtmlPost("host.net", "<html><body>content</body></html>"); 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(0U, features.features().size()); 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 286cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)// Test flakes with LSAN enabled. See http://crbug.com/373155. 287cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#if defined(LEAK_SANITIZER) 288cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define MAYBE_DisableDetection DISABLED_DisableDetection 289cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#else 290cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define MAYBE_DisableDetection DisableDetection 291cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#endif 292cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_DisableDetection) { 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // No scorer yet, so the classifier is not ready. 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(classifier_->is_ready()); 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Now set the scorer. 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) classifier_->set_phishing_scorer(scorer_.get()); 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_TRUE(classifier_->is_ready()); 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Set a NULL scorer, which turns detection back off. 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) classifier_->set_phishing_scorer(NULL); 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EXPECT_FALSE(classifier_->is_ready()); 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace safe_browsing 306