15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h"
129ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdoch#include "base/message_loop/message_loop.h"
13868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/stringprintf.h"
14eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "base/time/time.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/history_backend.h"
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/history/history_service.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/history_service_factory.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/profiles/profile.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/browser_features.h"
20f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/browser/safe_browsing/client_side_detection_host.h"
21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/browser/safe_browsing/database_manager.h"
22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/browser/safe_browsing/safe_browsing_service.h"
232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/safe_browsing/ui_manager.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/csd.pb.h"
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/test/base/chrome_render_view_host_test_harness.h"
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/test/base/testing_profile.h"
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/navigation_controller.h"
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/web_contents.h"
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/common/referrer.h"
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/test/test_browser_thread.h"
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/test/web_contents_tester.h"
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "testing/gmock/include/gmock/gmock.h"
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "testing/gtest/include/gtest/gtest.h"
341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "ui/base/page_transition_types.h"
35eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "url/gurl.h"
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)using content::BrowserThread;
38116680a4aac90f2aa7413d9095a592090648e557Ben Murdochusing content::ResourceType;
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::WebContentsTester;
40f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
41f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)using testing::DoAll;
42868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)using testing::Return;
43868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)using testing::StrictMock;
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace safe_browsing {
46f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
48f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
49f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class MockSafeBrowsingDatabaseManager : public SafeBrowsingDatabaseManager {
50f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) public:
51f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  explicit MockSafeBrowsingDatabaseManager(
52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      const scoped_refptr<SafeBrowsingService>& service)
53f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      : SafeBrowsingDatabaseManager(service) { }
54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  MOCK_METHOD1(MatchMalwareIP, bool(const std::string& ip_address));
56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) protected:
58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  virtual ~MockSafeBrowsingDatabaseManager() {}
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) private:
61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(MockSafeBrowsingDatabaseManager);
62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)};
63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
64f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class MockClientSideDetectionHost : public ClientSideDetectionHost {
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
66f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  MockClientSideDetectionHost(
67f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      content::WebContents* tab,
68f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      SafeBrowsingDatabaseManager* database_manager)
69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      : ClientSideDetectionHost(tab) {
70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    set_safe_browsing_managers(NULL, database_manager);
71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
7346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)  virtual ~MockClientSideDetectionHost() {}
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  MOCK_METHOD1(IsBadIpAddress, bool(const std::string&));
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BrowserFeatureExtractorTest : public ChromeRenderViewHostTestHarness {
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected:
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void SetUp() {
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ChromeRenderViewHostTestHarness::SetUp();
83bbcdd45c55eb7c4641ab97aef9889b0fc828e7d3Ben Murdoch    ASSERT_TRUE(profile()->CreateHistoryService(
84bbcdd45c55eb7c4641ab97aef9889b0fc828e7d3Ben Murdoch        true /* delete_file */, false /* no_db */));
85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    db_manager_ = new StrictMock<MockSafeBrowsingDatabaseManager>(
87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        SafeBrowsingService::CreateSafeBrowsingService());
88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    host_.reset(new StrictMock<MockClientSideDetectionHost>(
89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        web_contents(), db_manager_.get()));
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    extractor_.reset(
91f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        new BrowserFeatureExtractor(web_contents(), host_.get()));
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    num_pending_ = 0;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    browse_info_.reset(new BrowseInfo);
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void TearDown() {
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    extractor_.reset();
98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    host_.reset();
99f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    db_manager_ = NULL;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    profile()->DestroyHistoryService();
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ChromeRenderViewHostTestHarness::TearDown();
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ASSERT_EQ(0, num_pending_);
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HistoryService* history_service() {
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return HistoryServiceFactory::GetForProfile(profile(),
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                Profile::EXPLICIT_ACCESS);
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void SetRedirectChain(const std::vector<GURL>& redirect_chain,
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        bool new_host) {
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    browse_info_->url_redirects = redirect_chain;
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (new_host) {
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      browse_info_->host_redirects = redirect_chain;
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Wrapper around NavigateAndCommit that also sets the redirect chain to
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a sane value.
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void SimpleNavigateAndCommit(const GURL& url) {
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::vector<GURL> redirect_chain;
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    redirect_chain.push_back(url);
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SetRedirectChain(redirect_chain, true);
1241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    NavigateAndCommit(url, GURL(), ui::PAGE_TRANSITION_LINK);
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is similar to NavigateAndCommit that is in WebContentsTester, but
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // allows us to specify the referrer and page_transition_type.
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void NavigateAndCommit(const GURL& url,
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         const GURL& referrer,
1311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                         ui::PageTransition type) {
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    web_contents()->GetController().LoadURL(
133f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        url, content::Referrer(referrer, blink::WebReferrerPolicyDefault),
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        type, std::string());
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    static int page_id = 0;
1376e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)    content::RenderFrameHost* rfh =
1386e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)        WebContentsTester::For(web_contents())->GetPendingMainFrame();
1396e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)    if (!rfh) {
1406e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)      rfh = web_contents()->GetMainFrame();
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WebContentsTester::For(web_contents())->ProceedWithCrossSiteNavigation();
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WebContentsTester::For(web_contents())->TestDidNavigateWithReferrer(
1446e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)        rfh, ++page_id, url,
145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        content::Referrer(referrer, blink::WebReferrerPolicyDefault), type);
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ExtractFeatures(ClientPhishingRequest* request) {
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StartExtractFeatures(request);
15090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    base::MessageLoop::current()->Run();
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    EXPECT_EQ(1U, success_.count(request));
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return success_.count(request) ? success_[request] : false;
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void StartExtractFeatures(ClientPhishingRequest* request) {
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    success_.erase(request);
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++num_pending_;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    extractor_->ExtractFeatures(
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        browse_info_.get(),
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        request,
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base::Bind(&BrowserFeatureExtractorTest::ExtractFeaturesDone,
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   base::Unretained(this)));
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void GetFeatureMap(const ClientPhishingRequest& request,
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     std::map<std::string, double>* features) {
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int i = 0; i < request.non_model_feature_map_size(); ++i) {
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const ClientPhishingRequest::Feature& feature =
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          request.non_model_feature_map(i);
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      EXPECT_EQ(0U, features->count(feature.name()));
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (*features)[feature.name()] = feature.value();
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  void ExtractMalwareFeatures(ClientMalwareRequest* request) {
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // Feature extraction takes ownership of the request object
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // and passes it along to the done callback in the end.
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    StartExtractMalwareFeatures(request);
179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    base::MessageLoopForUI::current()->Run();
180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    EXPECT_EQ(1U, success_.count(request));
181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    EXPECT_TRUE(success_[request]);
182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
183f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
184f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void StartExtractMalwareFeatures(ClientMalwareRequest* request) {
185f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    success_.erase(request);
186f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    ++num_pending_;
187f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // We temporarily give up ownership of request to ExtractMalwareFeatures
188f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // but we'll regain ownership of it in ExtractMalwareFeaturesDone.
1892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    extractor_->ExtractMalwareFeatures(
190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        browse_info_.get(),
191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        request,
192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        base::Bind(&BrowserFeatureExtractorTest::ExtractMalwareFeaturesDone,
193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                   base::Unretained(this)));
1942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
1952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
196a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  void GetMalwareUrls(
1972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      const ClientMalwareRequest& request,
198a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      std::map<std::string, std::set<std::string> >* urls) {
199a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    for (int i = 0; i < request.bad_ip_url_info_size(); ++i) {
200a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      const ClientMalwareRequest::UrlInfo& urlinfo =
201a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)          request.bad_ip_url_info(i);
202a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      (*urls)[urlinfo.ip()].insert(urlinfo.url());
2032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
2042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
2052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
206f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  int num_pending_;  // Number of pending feature extractions.
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scoped_ptr<BrowserFeatureExtractor> extractor_;
208f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::map<void*, bool> success_;
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scoped_ptr<BrowseInfo> browse_info_;
210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  scoped_ptr<StrictMock<MockClientSideDetectionHost> > host_;
211f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  scoped_refptr<StrictMock<MockSafeBrowsingDatabaseManager> > db_manager_;
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
214116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  void ExtractFeaturesDone(bool success,
215116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch                           scoped_ptr<ClientPhishingRequest> request) {
216116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI));
217116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    ASSERT_EQ(0U, success_.count(request.get()));
218116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    // The pointer doesn't really belong to us.  It belongs to
219116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    // the test case which passed it to ExtractFeatures above.
220116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    success_[request.release()] = success;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (--num_pending_ == 0) {
22290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      base::MessageLoop::current()->Quit();
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
225f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
226f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void ExtractMalwareFeaturesDone(
227f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      bool success,
228f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      scoped_ptr<ClientMalwareRequest> request) {
229f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI));
230f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    ASSERT_EQ(0U, success_.count(request.get()));
231f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // The pointer doesn't really belong to us.  It belongs to
232f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // the test case which passed it to ExtractMalwareFeatures above.
233f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    success_[request.release()] = success;
234f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if (--num_pending_ == 0) {
235f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      base::MessageLoopForUI::current()->Quit();
236f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
237f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(BrowserFeatureExtractorTest, UrlNotInHistory) {
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientPhishingRequest request;
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SimpleNavigateAndCommit(GURL("http://www.google.com"));
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.google.com/");
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(ExtractFeatures(&request));
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(BrowserFeatureExtractorTest, RequestNotInitialized) {
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientPhishingRequest request;
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.google.com/");
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Request is missing the score value.
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SimpleNavigateAndCommit(GURL("http://www.google.com"));
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(ExtractFeatures(&request));
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(BrowserFeatureExtractorTest, UrlInHistory) {
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("https://www.foo.com/gaa.html"),
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);  // same host HTTPS.
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.foo.com/gaa.html"),
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);  // same host HTTP.
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://bar.foo.com/gaa.html"),
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);  // different host.
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.foo.com/bar.html?a=b"),
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now() - base::TimeDelta::FromHours(23),
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             NULL, 0, GURL(), history::RedirectList(),
2721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                             ui::PAGE_TRANSITION_LINK,
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED, false);
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now() - base::TimeDelta::FromHours(25),
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             NULL, 0, GURL(), history::RedirectList(),
2771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                             ui::PAGE_TRANSITION_TYPED,
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED, false);
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("https://www.foo.com/goo.html"),
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now() - base::TimeDelta::FromDays(5),
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             NULL, 0, GURL(), history::RedirectList(),
2821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                             ui::PAGE_TRANSITION_TYPED,
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED, false);
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SimpleNavigateAndCommit(GURL("http://www.foo.com/bar.html"));
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientPhishingRequest request;
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.foo.com/bar.html");
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(ExtractFeatures(&request));
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::map<std::string, double> features;
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetFeatureMap(request, &features);
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(12U, features.size());
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(2.0, features[features::kUrlHistoryVisitCount]);
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0,
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   features[features::kUrlHistoryVisitCountMoreThan24hAgo]);
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryTypedCount]);
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]);
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(4.0, features[features::kHttpHostVisitCount]);
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(2.0, features[features::kHttpsHostVisitCount]);
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]);
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpsHostVisitMoreThan24hAgo]);
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.Clear();
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://bar.foo.com/gaa.html");
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(ExtractFeatures(&request));
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.clear();
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetFeatureMap(request, &features);
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We have less features because we didn't Navigate to this page, so we don't
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // have Referrer, IsFirstNavigation, HasSSLReferrer, etc.
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(7U, features.size());
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryVisitCount]);
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(0.0,
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   features[features::kUrlHistoryVisitCountMoreThan24hAgo]);
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(0.0, features[features::kUrlHistoryTypedCount]);
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]);
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0, features[features::kHttpHostVisitCount]);
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(0.0, features[features::kHttpsHostVisitCount]);
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(0.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]);
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(features.count(features::kFirstHttpsHostVisitMoreThan24hAgo));
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(BrowserFeatureExtractorTest, MultipleRequestsAtOnce) {
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SimpleNavigateAndCommit(GURL("http:/www.foo.com/bar.html"));
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientPhishingRequest request;
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.foo.com/bar.html");
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StartExtractFeatures(&request);
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SimpleNavigateAndCommit(GURL("http://www.foo.com/goo.html"));
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientPhishingRequest request2;
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request2.set_url("http://www.foo.com/goo.html");
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request2.set_client_score(1.0);
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StartExtractFeatures(&request2);
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  base::MessageLoop::current()->Run();
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(success_[&request]);
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Success is false because the second URL is not in the history and we are
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // not able to distinguish between a missing URL in the history and an error.
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(success_[&request2]);
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(BrowserFeatureExtractorTest, BrowseFeatures) {
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.foo.com/"),
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.foo.com/page.html"),
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.bar.com/"),
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.bar.com/other_page.html"),
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service()->AddPage(GURL("http://www.baz.com/"),
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             base::Time::Now(),
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             history::SOURCE_BROWSED);
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientPhishingRequest request;
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.foo.com/");
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<GURL> redirect_chain;
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://somerandomwebsite.com/"));
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://www.foo.com/"));
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SetRedirectChain(redirect_chain, true);
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->http_status_code = 200;
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NavigateAndCommit(GURL("http://www.foo.com/"),
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    GURL("http://google.com/"),
3751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                    ui::PageTransitionFromInt(
3761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        ui::PAGE_TRANSITION_AUTO_BOOKMARK |
3771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        ui::PAGE_TRANSITION_FORWARD_BACK));
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(ExtractFeatures(&request));
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::map<std::string, double> features;
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetFeatureMap(request, &features);
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
3842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s=%s",
3852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kReferrer,
3862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://google.com/")]);
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
3882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s[0]=%s",
3892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kRedirect,
3902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://somerandomwebsite.com/")]);
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We shouldn't have a feature for the last redirect in the chain, since it
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // should always be the URL that we navigated to.
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0,
3942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s[1]=%s",
3952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kRedirect,
3962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://foo.com/")]);
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(2.0, features[features::kPageTransitionType]);
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0, features[features::kIsFirstNavigation]);
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(200.0, features[features::kHttpStatusCode]);
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.Clear();
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.foo.com/page.html");
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.clear();
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://www.foo.com/redirect"));
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://www.foo.com/second_redirect"));
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://www.foo.com/page.html"));
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SetRedirectChain(redirect_chain, false);
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->http_status_code = 404;
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NavigateAndCommit(GURL("http://www.foo.com/page.html"),
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    GURL("http://www.foo.com"),
4131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                    ui::PageTransitionFromInt(
4141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        ui::PAGE_TRANSITION_TYPED |
4151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        ui::PAGE_TRANSITION_CHAIN_START |
4161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        ui::PAGE_TRANSITION_CLIENT_REDIRECT));
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(ExtractFeatures(&request));
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.clear();
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetFeatureMap(request, &features);
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1,
4232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s=%s",
4242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kReferrer,
4252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://www.foo.com/")]);
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
4272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s[0]=%s",
4282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kRedirect,
4292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://www.foo.com/redirect")]);
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
4312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s[1]=%s",
4322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kRedirect,
4332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://www.foo.com/second_redirect")]);
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0, features[features::kPageTransitionType]);
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
4382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s%s=%s",
4392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kHostPrefix,
4402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kReferrer,
4412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://google.com/")]);
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
4432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s%s[0]=%s",
4442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kHostPrefix,
4452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kRedirect,
4462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://somerandomwebsite.com/")]);
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(2.0,
4482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s%s",
4492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kHostPrefix,
4502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kPageTransitionType)]);
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
4522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s%s",
4532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kHostPrefix,
4542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kIsFirstNavigation)]);
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(404.0, features[features::kHttpStatusCode]);
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.Clear();
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.bar.com/");
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.clear();
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://www.foo.com/page.html"));
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://www.bar.com/"));
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SetRedirectChain(redirect_chain, true);
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NavigateAndCommit(GURL("http://www.bar.com/"),
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    GURL("http://www.foo.com/page.html"),
4661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                    ui::PageTransitionFromInt(
4671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        ui::PAGE_TRANSITION_LINK |
4681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        ui::PAGE_TRANSITION_CHAIN_END |
4691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                        ui::PAGE_TRANSITION_CLIENT_REDIRECT));
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(ExtractFeatures(&request));
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.clear();
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetFeatureMap(request, &features);
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
4762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s=%s",
4772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kReferrer,
4782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://www.foo.com/page.html")]);
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
4802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s[0]=%s",
4812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kRedirect,
4822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://www.foo.com/page.html")]);
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kPageTransitionType]);
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Should not have host features.
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0U,
4892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features.count(base::StringPrintf("%s%s",
4902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                              features::kHostPrefix,
4912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                              features::kPageTransitionType)));
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0U,
4932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features.count(base::StringPrintf("%s%s",
4942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                              features::kHostPrefix,
4952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                              features::kIsFirstNavigation)));
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.Clear();
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.bar.com/other_page.html");
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.clear();
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://www.bar.com/other_page.html"));
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SetRedirectChain(redirect_chain, false);
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NavigateAndCommit(GURL("http://www.bar.com/other_page.html"),
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    GURL("http://www.bar.com/"),
5051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                    ui::PAGE_TRANSITION_LINK);
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(ExtractFeatures(&request));
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.clear();
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetFeatureMap(request, &features);
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
5122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s=%s",
5132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kReferrer,
5142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://www.bar.com/")]);
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kPageTransitionType]);
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
5192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s%s=%s",
5202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kHostPrefix,
5212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kReferrer,
5222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://www.foo.com/page.html")]);
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
5242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s%s[0]=%s",
5252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kHostPrefix,
5262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kRedirect,
5272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        "http://www.foo.com/page.html")]);
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0,
5292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s%s",
5302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kHostPrefix,
5312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kPageTransitionType)]);
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0.0,
5332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s%s",
5342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kHostPrefix,
5352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kIsFirstNavigation)]);
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.Clear();
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.baz.com/");
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.clear();
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("https://bankofamerica.com"));
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  redirect_chain.push_back(GURL("http://www.baz.com/"));
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SetRedirectChain(redirect_chain, true);
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NavigateAndCommit(GURL("http://www.baz.com"),
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    GURL("https://bankofamerica.com"),
5451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                    ui::PAGE_TRANSITION_GENERATED);
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_TRUE(ExtractFeatures(&request));
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.clear();
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetFeatureMap(request, &features);
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0,
5522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features[base::StringPrintf("%s[0]=%s",
5532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kRedirect,
5542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        features::kSecureRedirectValue)]);
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(1.0, features[features::kHasSSLReferrer]);
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(5.0, features[features::kPageTransitionType]);
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Should not have redirect or host features.
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0U,
5592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features.count(base::StringPrintf("%s%s",
5602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                              features::kHostPrefix,
5612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                              features::kPageTransitionType)));
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(0U,
5632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            features.count(base::StringPrintf("%s%s",
5642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                              features::kHostPrefix,
5652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                              features::kIsFirstNavigation)));
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(5.0, features[features::kPageTransitionType]);
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST_F(BrowserFeatureExtractorTest, SafeBrowsingFeatures) {
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SimpleNavigateAndCommit(GURL("http://www.foo.com/malware.html"));
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientPhishingRequest request;
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_url("http://www.foo.com/malware.html");
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  request.set_client_score(0.5);
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  browse_info_->unsafe_resource.reset(
5762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      new SafeBrowsingUIManager::UnsafeResource);
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->unsafe_resource->url = GURL("http://www.malware.com/");
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->unsafe_resource->original_url = GURL("http://www.good.com/");
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->unsafe_resource->is_subresource = true;
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->unsafe_resource->threat_type = SB_THREAT_TYPE_URL_MALWARE;
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExtractFeatures(&request);
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::map<std::string, double> features;
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetFeatureMap(request, &features);
5852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  EXPECT_TRUE(features.count(base::StringPrintf(
5862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      "%s%s",
5872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      features::kSafeBrowsingMaliciousUrl,
5882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      "http://www.malware.com/")));
5892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  EXPECT_TRUE(features.count(base::StringPrintf(
5902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      "%s%s",
5912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       features::kSafeBrowsingOriginalUrl,
5922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        "http://www.good.com/")));
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(1.0, features[features::kSafeBrowsingIsSubresource]);
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_DOUBLE_EQ(2.0, features[features::kSafeBrowsingThreatType]);
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)TEST_F(BrowserFeatureExtractorTest, MalwareFeatures) {
5982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  ClientMalwareRequest request;
5992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  request.set_url("http://www.foo.com/");
6002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
601a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  std::vector<IPUrlInfo> bad_urls;
6025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  bad_urls.push_back(
6035f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      IPUrlInfo("http://bad.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
6045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  bad_urls.push_back(
6055f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      IPUrlInfo("http://evil.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
606a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  browse_info_->ips.insert(std::make_pair("193.5.163.8", bad_urls));
607a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  browse_info_->ips.insert(std::make_pair("92.92.92.92", bad_urls));
608a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  std::vector<IPUrlInfo> good_urls;
6095f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  good_urls.push_back(
6105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      IPUrlInfo("http://ok.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
611a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  browse_info_->ips.insert(std::make_pair("23.94.78.1", good_urls));
612f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  EXPECT_CALL(*db_manager_, MatchMalwareIP("193.5.163.8"))
613f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      .WillOnce(Return(true));
614f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  EXPECT_CALL(*db_manager_, MatchMalwareIP("92.92.92.92"))
615f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      .WillOnce(Return(true));
616f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  EXPECT_CALL(*db_manager_, MatchMalwareIP("23.94.78.1"))
617f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      .WillOnce(Return(false));
6182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  ExtractMalwareFeatures(&request);
620a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  EXPECT_EQ(4, request.bad_ip_url_info_size());
621a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  std::map<std::string, std::set<std::string> > result_urls;
622a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  GetMalwareUrls(request, &result_urls);
623a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
624a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  EXPECT_EQ(2U, result_urls.size());
625a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  EXPECT_TRUE(result_urls.count("193.5.163.8"));
626a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  std::set<std::string> urls = result_urls["193.5.163.8"];
627a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  EXPECT_EQ(2U, urls.size());
628a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  EXPECT_TRUE(urls.find("http://bad.com") != urls.end());
629a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  EXPECT_TRUE(urls.find("http://evil.com") != urls.end());
630a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  EXPECT_TRUE(result_urls.count("92.92.92.92"));
631a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  urls = result_urls["92.92.92.92"];
632a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  EXPECT_EQ(2U, urls.size());
633a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  EXPECT_TRUE(urls.find("http://bad.com") != urls.end());
634a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  EXPECT_TRUE(urls.find("http://evil.com") != urls.end());
6352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
636a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
637a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)TEST_F(BrowserFeatureExtractorTest, MalwareFeatures_ExceedLimit) {
638a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  ClientMalwareRequest request;
639a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  request.set_url("http://www.foo.com/");
640a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
641a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  std::vector<IPUrlInfo> bad_urls;
6425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  bad_urls.push_back(
6435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      IPUrlInfo("http://bad.com", "GET", "", content::RESOURCE_TYPE_SCRIPT));
644a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  std::vector<std::string> ips;
645a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  for (int i = 0; i < 7; ++i) {  // Add 7 ips
646a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    std::string ip = base::StringPrintf("%d.%d.%d.%d", i, i, i, i);
647a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    ips.push_back(ip);
648a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    browse_info_->ips.insert(std::make_pair(ip, bad_urls));
649a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
650f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // First ip is good but all the others are bad.
651f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    EXPECT_CALL(*db_manager_, MatchMalwareIP(ip)).WillOnce(Return(i > 0));
652a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  }
653a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
654a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  ExtractMalwareFeatures(&request);
655a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  // The number of IP matched url we store is capped at 5 IPs per request.
656a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  EXPECT_EQ(5, request.bad_ip_url_info_size());
657a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)}
658a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace safe_browsing
660