15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/phishing_classifier.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h"
10f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/command_line.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h"
127d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "base/strings/string16.h"
13868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h"
14f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/common/chrome_switches.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/client_model.pb.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/csd.pb.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/features.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/murmurhash3_util.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/scorer.h"
21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/test/base/in_process_browser_test.h"
22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/test/base/ui_test_utils.h"
23f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/public/renderer/render_view.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "crypto/sha2.h"
25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/dns/mock_host_resolver.h"
26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/embedded_test_server.h"
27f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/http_response.h"
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "testing/gmock/include/gmock/gmock.h"
29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "url/gurl.h"
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::AllOf;
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Contains;
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Not;
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Pair;
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace {
375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// The first RenderFrame is routing ID 1, and the first RenderView is 2.
395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const int kRenderViewRoutingId = 2;
405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace safe_browsing {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class PhishingClassifierTest : public InProcessBrowserTest {
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected:
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PhishingClassifierTest()
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      : url_tld_token_net_(features::kUrlTldToken + std::string("net")),
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        page_link_domain_phishing_(features::kPageLinkDomain +
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   std::string("phishing.com")),
51f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        page_term_login_(features::kPageTerm + std::string("login")) {
52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    command_line->AppendSwitch(switches::kSingleProcess);
565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#if defined(OS_WIN)
57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // Don't want to try to create a GPU process.
580529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch    command_line->AppendSwitch(switches::kDisableGpu);
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif
60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  virtual void SetUpOnMainThread() OVERRIDE {
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Construct a model to test with.  We include one feature from each of
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // the feature extractors, which allows us to verify that they all ran.
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ClientSideModel model;
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_hashes(crypto::SHA256HashString(url_tld_token_net_));
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_));
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_hashes(crypto::SHA256HashString(page_term_login_));
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_hashes(crypto::SHA256HashString("login"));
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken +
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              std::string("net")));
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain +
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              std::string("phishing.com")));
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_hashes(crypto::SHA256HashString(features::kPageTerm +
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              std::string("login")));
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_hashes(crypto::SHA256HashString("login"));
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Add a default rule with a non-phishy weight.
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ClientSideModel::Rule* rule = model.add_rule();
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rule->set_weight(-1.0);
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // To give a phishy score, the total weight needs to be >= 0
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // (0.5 when converted to a probability).  This will only happen
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // if all of the listed features are present.
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rule = model.add_rule();
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rule->add_feature(0);
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rule->add_feature(1);
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rule->add_feature(2);
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rule->set_weight(1.0);
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_page_term(3);
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.set_murmur_hash_seed(2777808611U);
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed()));
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    model.set_max_words_per_term(1);
96cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    model.set_max_shingles_per_page(100);
97cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    model.set_shingle_size(3);
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    clock_ = new MockFeatureExtractorClock;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    scorer_.reset(Scorer::Create(model.SerializeAsString()));
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ASSERT_TRUE(scorer_.get());
102f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    classifier_.reset(new PhishingClassifier(
1045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        content::RenderView::FromRoutingID(kRenderViewRoutingId),
105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        clock_));
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  virtual void TearDownOnMainThread() OVERRIDE {
109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    content::RunAllPendingInMessageLoop();
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Helper method to start phishing classification and wait for it to
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // complete.  Returns the true if the page is classified as phishy and
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // false otherwise.
115a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  bool RunPhishingClassifier(const base::string16* page_text,
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             float* phishy_score,
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             FeatureMap* features) {
118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    ClientPhishingRequest verdict;
119f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // The classifier accesses the RenderView and must run in the RenderThread.
120f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    PostTaskToInProcessRendererAndWait(
121f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier,
122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                   base::Unretained(this),
123f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                   page_text, phishy_score, features, &verdict));
124f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return verdict.is_phishing();
125f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
126f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
127a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  void DoRunPhishingClassifier(const base::string16* page_text,
128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                               float* phishy_score,
129f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                               FeatureMap* features,
130f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                               ClientPhishingRequest* verdict) {
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *phishy_score = PhishingClassifier::kInvalidScore;
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    features->Clear();
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
134f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // Force synchronous behavior for ease of unittesting.
135f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    base::RunLoop run_loop;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    classifier_->BeginClassification(
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        page_text,
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base::Bind(&PhishingClassifierTest::ClassificationFinished,
139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                   base::Unretained(this), &run_loop, verdict));
140f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    content::RunThisRunLoop(&run_loop);
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    *phishy_score = verdict->client_score();
143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    for (int i = 0; i < verdict->feature_map_size(); ++i) {
144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      features->AddRealFeature(verdict->feature_map(i).name(),
145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                               verdict->feature_map(i).value());
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Completion callback for classification.
150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void ClassificationFinished(base::RunLoop* run_loop,
151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              ClientPhishingRequest* verdict_out,
152f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              const ClientPhishingRequest& verdict) {
153f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    *verdict_out = verdict;  // Copy the verdict.
154f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    run_loop->Quit();
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  net::test_server::EmbeddedTestServer* embedded_test_server() {
159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // TODO(ajwong): Merge this into BrowserTestBase.
160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if (!embedded_test_server_) {
161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      embedded_test_server_->RegisterRequestHandler(
163f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          base::Bind(&PhishingClassifierTest::HandleRequest,
164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     base::Unretained(this)));
165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      CHECK(embedded_test_server_->InitializeAndWaitUntilReady());
166f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
167f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return embedded_test_server_.get();
168f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
169f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
170f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void LoadHtml(const std::string& host, const std::string& content) {
171f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    GURL::Replacements replace_host;
172f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    replace_host.SetHostStr(host);
173f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    response_content_ = content;
174f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    ui_test_utils::NavigateToURL(
175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        browser(),
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        embedded_test_server()->base_url().ReplaceComponents(replace_host));
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void LoadHtmlPost(const std::string& host, const std::string& content) {
180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    GURL::Replacements replace_host;
181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    replace_host.SetHostStr(host);
182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    response_content_ = content;
183f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    ui_test_utils::NavigateToURLWithPost(
184f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        browser(),
185f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        embedded_test_server()->base_url().ReplaceComponents(replace_host));
186f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
187f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
188f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  scoped_ptr<net::test_server::HttpResponse>
189f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      HandleRequest(const net::test_server::HttpRequest& request) {
190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    scoped_ptr<net::test_server::BasicHttpResponse> http_response(
191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        new net::test_server::BasicHttpResponse());
192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    http_response->set_code(net::HTTP_OK);
193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    http_response->set_content_type("text/html");
194f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    http_response->set_content(response_content_);
195f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return http_response.PassAs<net::test_server::HttpResponse>();
196f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
197f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
198f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::string response_content_;
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scoped_ptr<Scorer> scorer_;
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scoped_ptr<PhishingClassifier> classifier_;
201f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  MockFeatureExtractorClock* clock_;  // Owned by classifier_.
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Features that are in the model.
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string url_tld_token_net_;
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string page_link_domain_phishing_;
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string page_term_login_;
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
209f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// This test flakes on Mac with force compositing mode.
210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// http://crbug.com/316709
211f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#if defined(OS_MACOSX)
212f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define MAYBE_TestClassification DISABLED_TestClassification
213f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#else
214f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define MAYBE_TestClassification TestClassification
215f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif
216f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) {
217f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  host_resolver()->AddRule("*", "127.0.0.1");
218f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // No scorer yet, so the classifier is not ready.
220f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ASSERT_FALSE(classifier_->is_ready());
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Now set the scorer.
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  classifier_->set_phishing_scorer(scorer_.get());
224f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ASSERT_TRUE(classifier_->is_ready());
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This test doesn't exercise the extraction timing.
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_CALL(*clock_, Now())
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  base::string16 page_text = base::ASCIIToUTF16("login");
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  float phishy_score;
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap features;
233f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
234f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml("host.net",
235f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features));
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note: features.features() might contain other features that simply aren't
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // in the model.
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_THAT(features.features(),
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    Contains(Pair(page_link_domain_phishing_, 1.0)),
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    Contains(Pair(page_term_login_, 1.0))));
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FLOAT_EQ(0.5, phishy_score);
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Change the link domain to something non-phishy.
246f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml("host.net",
247f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)           "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_THAT(features.features(),
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    Contains(Pair(page_term_login_, 1.0))));
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_THAT(features.features(),
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              Not(Contains(Pair(page_link_domain_phishing_, 1.0))));
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_GE(phishy_score, 0.0);
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_LT(phishy_score, 0.5);
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
257f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Extraction should fail for this case since there is no TLD.
258f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml("localhost", "<html><body>content</body></html>");
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0U, features.features().size());
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
263f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Extraction should also fail for this case because the URL is not http.
264f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  net::SpawnedTestServer https_server(
265f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      net::SpawnedTestServer::TYPE_HTTPS,
266f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      net::SpawnedTestServer::kLocalhost,
267f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
268f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ASSERT_TRUE(https_server.Start());
269f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::string host_str("host.net");  // Must outlive replace_host.
270f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  GURL::Replacements replace_host;
271f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  replace_host.SetHostStr(host_str);
272f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  GURL test_url = https_server.GetURL("/files/title1.html");
273f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ui_test_utils::NavigateToURL(browser(),
274f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                               test_url.ReplaceComponents(replace_host));
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0U, features.features().size());
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Extraction should fail for this case because the URL is a POST request.
280f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtmlPost("host.net", "<html><body>content</body></html>");
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0U, features.features().size());
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
286cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)// Test flakes with LSAN enabled. See http://crbug.com/373155.
287cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#if defined(LEAK_SANITIZER)
288cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define MAYBE_DisableDetection DISABLED_DisableDetection
289cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#else
290cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#define MAYBE_DisableDetection DisableDetection
291cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#endif
292cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_DisableDetection) {
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // No scorer yet, so the classifier is not ready.
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(classifier_->is_ready());
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Now set the scorer.
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  classifier_->set_phishing_scorer(scorer_.get());
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(classifier_->is_ready());
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set a NULL scorer, which turns detection back off.
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  classifier_->set_phishing_scorer(NULL);
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(classifier_->is_ready());
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace safe_browsing
306