15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/client_side_detection_host.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/ref_counted.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/metrics/histogram.h"
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/prefs/pref_service.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/sequenced_task_runner_helpers.h"
155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/strings/utf_string_conversions.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/browser_process.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/profiles/profile.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/client_side_detection_service.h"
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/safe_browsing/database_manager.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/safe_browsing_service.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/pref_names.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/csd.pb.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/safe_browsing/safebrowsing_messages.h"
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/browser_thread.h"
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/navigation_controller.h"
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/navigation_details.h"
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/navigation_entry.h"
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/notification_details.h"
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/notification_source.h"
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/notification_types.h"
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/render_process_host.h"
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/render_view_host.h"
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/resource_request_details.h"
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/web_contents.h"
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/common/frame_navigate_params.h"
37effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#include "content/public/common/url_constants.h"
38eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "url/gurl.h"
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::BrowserThread;
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::NavigationEntry;
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::ResourceRequestDetails;
43116680a4aac90f2aa7413d9095a592090648e557Ben Murdochusing content::ResourceType;
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::WebContents;
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace safe_browsing {
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
4946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
53effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochtypedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
54effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This class is instantiated each time a new toplevel URL loads, and
56effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch// asynchronously checks whether the malware and phishing classifiers should run
57effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch// for this URL.  If so, it notifies the host class by calling the provided
58effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch// callback form the UI thread.  Objects of this class are ref-counted and will
59effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch// be destroyed once nobody uses it anymore.  If |web_contents|, |csd_service|
60effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch// or |host| go away you need to call Cancel().  We keep the |database_manager|
61effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch// alive in a ref pointer for as long as it takes.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ClientSideDetectionHost::ShouldClassifyUrlRequest
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : public base::RefCountedThreadSafe<
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ClientSideDetectionHost::ShouldClassifyUrlRequest> {
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
66effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  ShouldClassifyUrlRequest(
67effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      const content::FrameNavigateParams& params,
68effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      const ShouldClassifyUrlCallback& start_phishing_classification,
69effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      const ShouldClassifyUrlCallback& start_malware_classification,
70effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      WebContents* web_contents,
71effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      ClientSideDetectionService* csd_service,
72effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      SafeBrowsingDatabaseManager* database_manager,
73effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      ClientSideDetectionHost* host)
74effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      : params_(params),
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        web_contents_(web_contents),
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        csd_service_(csd_service),
772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        database_manager_(database_manager),
78effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        host_(host),
79effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        start_phishing_classification_cb_(start_phishing_classification),
80effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        start_malware_classification_cb_(start_malware_classification) {
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(web_contents_);
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(csd_service_);
84868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    DCHECK(database_manager_.get());
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(host_);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Start() {
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We start by doing some simple checks that can run on the UI thread.
92effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
93effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Only classify [X]HTML documents.
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (params_.contents_mime_type != "text/html" &&
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        params_.contents_mime_type != "application/xhtml+xml") {
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              << " because it has an unsupported MIME type: "
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              << params_.contents_mime_type;
101effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              << " because of hosting on private IP: "
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              << params_.socket_address.host();
108effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
109effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
112effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // For phishing we only classify HTTP pages.
113010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    if (!params_.url.SchemeIs(url::kHttpScheme)) {
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
115effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch              << " because it is not HTTP: "
116effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch              << params_.socket_address.host();
117effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
118effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
120effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // Don't run any classifier if the tab is incognito.
121effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
122effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      VLOG(1) << "Skipping phishing and malware classification for URL: "
123effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch              << params_.url << " because we're browsing incognito.";
124effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
125effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We lookup the csd-whitelist before we lookup the cache because
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // a URL may have recently been whitelisted.  If the URL matches
130effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // the csd-whitelist we won't start phishing classification.  The
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // csd-whitelist check has to be done on the IO thread because it
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // uses the SafeBrowsing service class.
133effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
134effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      BrowserThread::PostTask(
135effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          BrowserThread::IO,
136effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          FROM_HERE,
137effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch          base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
138effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                     this, params_.url));
139effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Cancel() {
143effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
144effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    DontClassifyForMalware(NO_CLASSIFY_CANCEL);
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Just to make sure we don't do anything stupid we reset all these
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // pointers except for the safebrowsing service class which may be
147effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // accessed by CheckSafeBrowsingDatabase().
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    web_contents_ = NULL;
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    csd_service_ = NULL;
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    host_ = NULL;
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  friend class base::RefCountedThreadSafe<
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ClientSideDetectionHost::ShouldClassifyUrlRequest>;
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Enum used to keep stats about why the pre-classification check failed.
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum PreClassificationCheckFailures {
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NO_CLASSIFY_PRIVATE_IP,
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NO_CLASSIFY_OFF_THE_RECORD,
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NO_CLASSIFY_MATCH_CSD_WHITELIST,
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NO_CLASSIFY_TOO_MANY_REPORTS,
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
165effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    NO_CLASSIFY_NO_DATABASE_MANAGER,
166effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    NO_CLASSIFY_KILLSWITCH,
167effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    NO_CLASSIFY_CANCEL,
168effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    NO_CLASSIFY_RESULT_FROM_CACHE,
169effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    NO_CLASSIFY_NOT_HTTP_URL,
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NO_CLASSIFY_MAX  // Always add new values before this one.
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The destructor can be called either from the UI or the IO thread.
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual ~ShouldClassifyUrlRequest() { }
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
177effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  bool ShouldClassifyForPhishing() const {
178effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
179effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return !start_phishing_classification_cb_.is_null();
180effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  }
181effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
182effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  bool ShouldClassifyForMalware() const {
183effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
184effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return !start_malware_classification_cb_.is_null();
185effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  }
186effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
187effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
188effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
189effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (ShouldClassifyForPhishing()) {
190effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      // Track the first reason why we stopped classifying for phishing.
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
192effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                                reason, NO_CLASSIFY_MAX);
193effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DVLOG(2) << "Failed phishing pre-classification checks.  Reason: "
194effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch               << reason;
195effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      start_phishing_classification_cb_.Run(false);
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
197effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    start_phishing_classification_cb_.Reset();
198effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  }
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
200effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  void DontClassifyForMalware(PreClassificationCheckFailures reason) {
201effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
202effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (ShouldClassifyForMalware()) {
203effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      // Track the first reason why we stopped classifying for malware.
204effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
205effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                                reason, NO_CLASSIFY_MAX);
206effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DVLOG(2) << "Failed malware pre-classification checks.  Reason: "
207effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch               << reason;
208effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      start_malware_classification_cb_.Run(false);
209effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
210effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    start_malware_classification_cb_.Reset();
211effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  }
21258e6fbe4ee35d65e14b626c557d37565bf8ad179Ben Murdoch
213effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  void CheckSafeBrowsingDatabase(const GURL& url) {
214effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
215effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // We don't want to call the classification callbacks from the IO
216effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // thread so we simply pass the results of this method to CheckCache()
217effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // which is called on the UI thread;
218effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
219effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
220effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (!database_manager_.get()) {
221effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      // We cannot check the Safe Browsing whitelists so we stop here
222effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      // for safety.
223effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
224effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    } else {
225effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      if (database_manager_->MatchCsdWhitelistUrl(url)) {
226effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        VLOG(1) << "Skipping phishing classification for URL: " << url
227effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                << " because it matches the csd whitelist";
228effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
229effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      }
230effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      if (database_manager_->IsMalwareKillSwitchOn()) {
231effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        malware_reason = NO_CLASSIFY_KILLSWITCH;
232effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      }
233effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    BrowserThread::PostTask(
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        BrowserThread::UI,
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        FROM_HERE,
237effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        base::Bind(&ShouldClassifyUrlRequest::CheckCache,
238effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                   this,
239effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                   phishing_reason,
240effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                   malware_reason));
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
243effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  void CheckCache(PreClassificationCheckFailures phishing_reason,
244effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                  PreClassificationCheckFailures malware_reason) {
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
246effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (phishing_reason != NO_CLASSIFY_MAX)
247effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForPhishing(phishing_reason);
248effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (malware_reason != NO_CLASSIFY_MAX)
249effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForMalware(malware_reason);
250effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
251effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      return;  // No point in doing anything else.
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
253effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // If result is cached, we don't want to run classification again.
254effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // In that case we're just trying to show the warning.
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool is_phishing;
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(1) << "Satisfying request for " << params_.url << " from cache";
258effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Since we are already on the UI thread, this is safe.
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      host_->MaybeShowPhishingWarning(params_.url, is_phishing);
261effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We want to limit the number of requests, though we will ignore the
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // limit for urls in the cache.  We don't want to start classifying
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // too many pages as phishing, but for those that we already think are
267effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // phishing we want to send a request to the server to give ourselves
268effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // a chance to fix misclassifications.
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (csd_service_->IsInCache(params_.url)) {
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(1) << "Reporting limit skipped for " << params_.url
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              << " as it was in the cache.";
272effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
27390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    } else if (csd_service_->OverPhishingReportLimit()) {
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VLOG(1) << "Too many report phishing requests sent recently, "
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              << "not running classification for " << params_.url;
276effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
277effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
278effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (csd_service_->OverMalwareReportLimit()) {
279effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Everything checks out, so start classification.
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // |web_contents_| is safe to call as we will be destructed
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // before it is.
285c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    if (ShouldClassifyForPhishing()) {
286effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      start_phishing_classification_cb_.Run(true);
287c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch      // Reset the callback to make sure ShouldClassifyForPhishing()
288c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch      // returns false.
289c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch      start_phishing_classification_cb_.Reset();
290c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    }
291c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    if (ShouldClassifyForMalware()) {
292effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      start_malware_classification_cb_.Run(true);
293c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch      // Reset the callback to make sure ShouldClassifyForMalware()
294c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch      // returns false.
295c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch      start_malware_classification_cb_.Reset();
296c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    }
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  content::FrameNavigateParams params_;
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  WebContents* web_contents_;
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientSideDetectionService* csd_service_;
3022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // We keep a ref pointer here just to make sure the safe browsing
3032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // database manager stays alive long enough.
3042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientSideDetectionHost* host_;
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
307effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  ShouldClassifyUrlCallback start_phishing_classification_cb_;
308effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  ShouldClassifyUrlCallback start_malware_classification_cb_;
309effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ClientSideDetectionHost* ClientSideDetectionHost::Create(
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WebContents* tab) {
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return new ClientSideDetectionHost(tab);
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : content::WebContentsObserver(tab),
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      csd_service_(NULL),
322effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      classification_request_(NULL),
323effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      should_extract_malware_features_(true),
324effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      should_classify_for_malware_(false),
325e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      pageload_complete_(false),
3261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      unsafe_unique_page_id_(-1),
3271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      weak_factory_(this) {
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(tab);
3292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Note: csd_service_ and sb_service will be NULL here in testing.
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  csd_service_ = g_browser_process->safe_browsing_detection_service();
331f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 content::Source<WebContents>(tab));
3342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  scoped_refptr<SafeBrowsingService> sb_service =
3362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      g_browser_process->safe_browsing_service();
337868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  if (sb_service.get()) {
3382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    ui_manager_ = sb_service->ui_manager();
3392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    database_manager_ = sb_service->database_manager();
3402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    ui_manager_->AddObserver(this);
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ClientSideDetectionHost::~ClientSideDetectionHost() {
345868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  if (ui_manager_.get())
3462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    ui_manager_->RemoveObserver(this);
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool handled = true;
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        OnPhishingDetectionDone)
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    IPC_MESSAGE_UNHANDLED(handled = false)
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IPC_END_MESSAGE_MAP()
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return handled;
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ClientSideDetectionHost::DidNavigateMainFrame(
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const content::LoadCommittedDetails& details,
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const content::FrameNavigateParams& params) {
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that don't call this method on the UI thread.
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (details.is_in_page) {
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If the navigation is within the same page, the user isn't really
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // navigating away.  We don't need to cancel a pending callback or
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // begin a new classification.
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
371effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  // Cancel any pending classification request.
372effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (classification_request_.get()) {
373effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    classification_request_->Cancel();
374effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  }
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we navigate away and there currently is a pending phishing
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // report request we have to cancel it to make sure we don't display
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // an interstitial for the wrong page.  Note that this won't cancel
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the server ping back but only cancel the showing of the
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // interstial.
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  weak_factory_.InvalidateWeakPtrs();
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!csd_service_) {
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_.reset(new BrowseInfo);
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Store redirect chain information.
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (params.url.host() != cur_host_) {
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur_host_ = params.url.host();
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur_host_redirects_ = params.redirects;
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
392effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  browse_info_->url = params.url;
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->host_redirects = cur_host_redirects_;
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->url_redirects = params.redirects;
395f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  browse_info_->referrer = params.referrer.url;
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  browse_info_->http_status_code = details.http_status_code;
397effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  browse_info_->page_id = params.page_id;
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
399effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  should_extract_malware_features_ = true;
400effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  should_classify_for_malware_ = false;
401e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  pageload_complete_ = false;
402effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
403effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  // Check whether we can cassify the current URL for phishing or malware.
404868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  classification_request_ = new ShouldClassifyUrlRequest(
405effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      params,
406effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
407effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                 weak_factory_.GetWeakPtr()),
408effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
409effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                 weak_factory_.GetWeakPtr()),
410effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      web_contents(), csd_service_, database_manager_.get(), this);
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  classification_request_->Start();
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ClientSideDetectionHost::OnSafeBrowsingHit(
4152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const SafeBrowsingUIManager::UnsafeResource& resource) {
416a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
417a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    return;
418a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
419a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  // Check that the hit is either malware or phishing.
420a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
421a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
422a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    return;
423a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
424a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  // Check that this notification is really for us.
425a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
426a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      resource.render_process_host_id, resource.render_view_id);
427a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  if (!hit_rvh ||
428a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
429a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    return;
430a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
431a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  // Store the unique page ID for later.
432a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  unsafe_unique_page_id_ =
433a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      web_contents()->GetController().GetActiveEntry()->GetUniqueID();
4345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
435a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  // We also keep the resource around in order to be able to send the
436a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  // malicious URL to the server.
437a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
438a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  unsafe_resource_->callback.Reset();  // Don't do anything stupid.
439a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)}
440a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
441a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)void ClientSideDetectionHost::OnSafeBrowsingMatch(
442a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    const SafeBrowsingUIManager::UnsafeResource& resource) {
4435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
4445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return;
4455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
4465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  // Check that this notification is really for us.
4475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
4485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      resource.render_process_host_id, resource.render_view_id);
4495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!hit_rvh ||
4505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
4515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return;
4525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
4535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  web_contents()->GetController().GetActiveEntry()->SetExtraData(
4545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
457f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)scoped_refptr<SafeBrowsingDatabaseManager>
458f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)ClientSideDetectionHost::database_manager() {
459a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  return database_manager_;
460a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)}
461a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
462a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
4635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
4645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return false;
4655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
4665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  // If an interstitial page is showing, GetVisibleEntry will return the
4675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  // transient NavigationEntry for the interstitial. The transient entry
4685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  // will not have the flag set, so use the pending entry instead if there
4695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  // is one.
4705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
4715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!entry) {
4725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    entry = web_contents()->GetController().GetVisibleEntry();
4735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
4745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      entry = web_contents()->GetController().GetLastCommittedEntry();
4755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if (!entry)
4765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return false;
4775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  }
4785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
4795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  base::string16 value;
4805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
481f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
482f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
483010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)void ClientSideDetectionHost::WebContentsDestroyed() {
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Tell any pending classification request that it is being canceled.
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (classification_request_.get()) {
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    classification_request_->Cancel();
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Cancel all pending feature extractions.
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  feature_extractor_.reset();
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
492effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochvoid ClientSideDetectionHost::OnPhishingPreClassificationDone(
493effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    bool should_classify) {
494effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
495effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (browse_info_.get() && should_classify) {
496effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    VLOG(1) << "Instruct renderer to start phishing detection for URL: "
497effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch            << browse_info_->url;
498effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
499effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
500effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        rvh->GetRoutingID(), browse_info_->url));
501effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  }
502effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch}
503effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
504effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochvoid ClientSideDetectionHost::OnMalwarePreClassificationDone(
505effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    bool should_classify) {
506effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
507effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  // If classification checks failed we should stop extracting malware features.
508effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DVLOG(2) << "Malware pre-classification checks done. Should classify: "
509effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch           << should_classify;
510effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  should_extract_malware_features_ = should_classify;
511effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  should_classify_for_malware_ = should_classify;
512effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  MaybeStartMalwareFeatureExtraction();
513effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch}
514effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
515e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdochvoid ClientSideDetectionHost::DidStopLoading(content::RenderViewHost* rvh) {
516effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
517effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (!csd_service_ || !browse_info_.get())
518effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    return;
519e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  DVLOG(2) << "Page finished loading.";
520e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  pageload_complete_ = true;
521effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  MaybeStartMalwareFeatureExtraction();
522effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch}
523effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
524effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochvoid ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
525effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
526effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (csd_service_ && browse_info_.get() &&
527effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      should_classify_for_malware_ &&
528e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch      pageload_complete_) {
529effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    scoped_ptr<ClientMalwareRequest> malware_request(
530effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        new ClientMalwareRequest);
531effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // Start browser-side malware feature extraction.  Once we're done it will
532effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // send the malware client verdict request.
533effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    malware_request->set_url(browse_info_->url.spec());
534effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    const GURL& referrer = browse_info_->referrer;
535effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (referrer.SchemeIs("http")) {  // Only send http urls.
536effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      malware_request->set_referrer_url(referrer.spec());
537effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    }
538effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // This function doesn't expect browse_info_ to stay around after this
539effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    // function returns.
540effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    feature_extractor_->ExtractMalwareFeatures(
541effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        browse_info_.get(),
542effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        malware_request.release(),
543effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch        base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
544effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                   weak_factory_.GetWeakPtr()));
545effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    should_classify_for_malware_ = false;
546effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  }
547effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch}
548effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ClientSideDetectionHost::OnPhishingDetectionDone(
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& verdict_str) {
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There is something seriously wrong if there is no service class but
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // this method is called.  The renderer should not start phishing detection
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // if there isn't any service class in the browser.
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(csd_service_);
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(browse_info_.get());
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We parse the protocol buffer here.  If we're unable to parse it we won't
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // send the verdict further.
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (csd_service_ &&
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      browse_info_.get() &&
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      verdict->ParseFromString(verdict_str) &&
5642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      verdict->IsInitialized()) {
5652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // We only send phishing verdict to the server if the verdict is phishing or
5662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
5672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // malware or phishing interstitial was shown but the user clicked
5682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // through.
5692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (verdict->is_phishing() || DidShowSBInterstitial()) {
5702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (DidShowSBInterstitial()) {
5712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        browse_info_->unsafe_resource.reset(unsafe_resource_.release());
5722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      }
5732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // Start browser-side feature extraction.  Once we're done it will send
5742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // the client verdict request.
5752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      feature_extractor_->ExtractFeatures(
5762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          browse_info_.get(),
5772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          verdict.release(),
5782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
5792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                     weak_factory_.GetWeakPtr()));
5802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                       bool is_phishing) {
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
587effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
588effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch           << " is_phishing:" << is_phishing;
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (is_phishing) {
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(web_contents());
591868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    if (ui_manager_.get()) {
5922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SafeBrowsingUIManager::UnsafeResource resource;
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      resource.url = phishing_url;
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      resource.original_url = phishing_url;
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      resource.is_subresource = false;
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      resource.render_process_host_id =
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          web_contents()->GetRenderProcessHost()->GetID();
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      resource.render_view_id =
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          web_contents()->GetRenderViewHost()->GetRoutingID();
6012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (!ui_manager_->IsWhitelisted(resource)) {
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We need to stop any pending navigations, otherwise the interstital
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // might not get created properly.
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        web_contents()->GetController().DiscardNonCommittedEntries();
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6065d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      ui_manager_->DisplayBlockingPage(resource);
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
608424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    // If there is true phishing verdict, invalidate weakptr so that no longer
609424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    // consider the malware vedict.
610424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    weak_factory_.InvalidateWeakPtrs();
611424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)  }
612424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)}
613424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)
614424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
615424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)                                                      GURL malware_url,
616424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)                                                      bool is_malware) {
617424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
618effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
619effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch           << " is_malware:" << is_malware;
620424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)  if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
621424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    DCHECK(web_contents());
622424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    if (ui_manager_.get()) {
623424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      SafeBrowsingUIManager::UnsafeResource resource;
624424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      resource.url = malware_url;
625424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      resource.original_url = original_url;
626424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      resource.is_subresource = (malware_url.host() != original_url.host());
627424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
628424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      resource.render_process_host_id =
629424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)          web_contents()->GetRenderProcessHost()->GetID();
630424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      resource.render_view_id =
631424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)          web_contents()->GetRenderViewHost()->GetRoutingID();
632424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      if (!ui_manager_->IsWhitelisted(resource)) {
633424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)        // We need to stop any pending navigations, otherwise the interstital
634424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)        // might not get created properly.
635424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)        web_contents()->GetController().DiscardNonCommittedEntries();
636424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)      }
6375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      ui_manager_->DisplayBlockingPage(resource);
638424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    }
639424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    // If there is true malware verdict, invalidate weakptr so that no longer
640424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    // consider the phishing vedict.
641424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    weak_factory_.InvalidateWeakPtrs();
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ClientSideDetectionHost::FeatureExtractionDone(
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool success,
647116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    scoped_ptr<ClientPhishingRequest> request) {
648f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  DCHECK(request);
649effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
650effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch           << request->url() << ". Start sending client phishing request.";
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If the client-side verdict isn't phishing we don't care about the server
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // response because we aren't going to display a warning.
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (request->is_phishing()) {
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          weak_factory_.GetWeakPtr());
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Send ping even if the browser feature extraction failed.
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  csd_service_->SendClientReportPhishingRequest(
660116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      request.release(),  // The service takes ownership of the request object.
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      callback);
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void ClientSideDetectionHost::MalwareFeatureExtractionDone(
665f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    bool feature_extraction_success,
6662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    scoped_ptr<ClientMalwareRequest> request) {
667f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  DCHECK(request.get());
668effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
669effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch           << ", with badip url count:" << request->bad_ip_url_info_size();
6702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Send ping if there is matching features.
672a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
6732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    VLOG(1) << "Start sending client malware request.";
6742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
675424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)    callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
676424c4d7b64af9d0d8fd9624f381f469654d5e3d2Torne (Richard Coles)                          weak_factory_.GetWeakPtr());
677f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    csd_service_->SendClientReportMalwareRequest(request.release(), callback);
6782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
6792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
6802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
6825f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                                             const std::string& url,
6835f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                                             const std::string& method,
6845f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                                             const std::string& referrer,
6855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                                             const ResourceType resource_type) {
686a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  if (ip.empty() || url.empty())
6872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return;
6882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
689a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  IPUrlMap::iterator it = browse_info_->ips.find(ip);
6902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (it == browse_info_->ips.end()) {
69146d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)    if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
692a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      std::vector<IPUrlInfo> url_infos;
693a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
694a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)      browse_info_->ips.insert(make_pair(ip, url_infos));
6952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
69646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)  } else if (it->second.size() < kMaxUrlsPerIP) {
697a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
6982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
6992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
7002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ClientSideDetectionHost::Observe(
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int type,
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const content::NotificationSource& source,
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const content::NotificationDetails& details) {
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      details).ptr();
709effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (req && browse_info_.get() &&
710effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      should_extract_malware_features_ && req->url.is_valid()) {
711effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    UpdateIPUrlMap(req->socket_address.host() /* ip */,
712effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                   req->url.spec()  /* url */,
713effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                   req->method,
714effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                   req->referrer,
715effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                   req->resource_type);
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
719a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool ClientSideDetectionHost::DidShowSBInterstitial() const {
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const NavigationEntry* nav_entry =
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      web_contents()->GetController().GetActiveEntry();
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ClientSideDetectionHost::set_client_side_detection_service(
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ClientSideDetectionService* service) {
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  csd_service_ = service;
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void ClientSideDetectionHost::set_safe_browsing_managers(
7342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SafeBrowsingUIManager* ui_manager,
7352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SafeBrowsingDatabaseManager* database_manager) {
736868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  if (ui_manager_.get())
7372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    ui_manager_->RemoveObserver(this);
7382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
7392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  ui_manager_ = ui_manager;
7402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (ui_manager)
7412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    ui_manager_->AddObserver(this);
7422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
7432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  database_manager_ = database_manager;
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace safe_browsing
747