1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/client_side_detection_host.h"
6
7#include <vector>
8
9#include "base/logging.h"
10#include "base/memory/ref_counted.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/metrics/histogram.h"
13#include "base/prefs/pref_service.h"
14#include "base/sequenced_task_runner_helpers.h"
15#include "base/strings/utf_string_conversions.h"
16#include "chrome/browser/browser_process.h"
17#include "chrome/browser/profiles/profile.h"
18#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
19#include "chrome/browser/safe_browsing/client_side_detection_service.h"
20#include "chrome/browser/safe_browsing/database_manager.h"
21#include "chrome/browser/safe_browsing/safe_browsing_service.h"
22#include "chrome/common/pref_names.h"
23#include "chrome/common/safe_browsing/csd.pb.h"
24#include "chrome/common/safe_browsing/safebrowsing_messages.h"
25#include "content/public/browser/browser_thread.h"
26#include "content/public/browser/navigation_controller.h"
27#include "content/public/browser/navigation_details.h"
28#include "content/public/browser/navigation_entry.h"
29#include "content/public/browser/notification_details.h"
30#include "content/public/browser/notification_source.h"
31#include "content/public/browser/notification_types.h"
32#include "content/public/browser/render_process_host.h"
33#include "content/public/browser/render_view_host.h"
34#include "content/public/browser/resource_request_details.h"
35#include "content/public/browser/web_contents.h"
36#include "content/public/common/frame_navigate_params.h"
37#include "content/public/common/url_constants.h"
38#include "url/gurl.h"
39
40using content::BrowserThread;
41using content::NavigationEntry;
42using content::ResourceRequestDetails;
43using content::ResourceType;
44using content::WebContents;
45
46namespace safe_browsing {
47
48const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
49const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
50
51const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
52
53typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
54
55// This class is instantiated each time a new toplevel URL loads, and
56// asynchronously checks whether the malware and phishing classifiers should run
57// for this URL.  If so, it notifies the host class by calling the provided
58// callback form the UI thread.  Objects of this class are ref-counted and will
59// be destroyed once nobody uses it anymore.  If |web_contents|, |csd_service|
60// or |host| go away you need to call Cancel().  We keep the |database_manager|
61// alive in a ref pointer for as long as it takes.
62class ClientSideDetectionHost::ShouldClassifyUrlRequest
63    : public base::RefCountedThreadSafe<
64          ClientSideDetectionHost::ShouldClassifyUrlRequest> {
65 public:
66  ShouldClassifyUrlRequest(
67      const content::FrameNavigateParams& params,
68      const ShouldClassifyUrlCallback& start_phishing_classification,
69      const ShouldClassifyUrlCallback& start_malware_classification,
70      WebContents* web_contents,
71      ClientSideDetectionService* csd_service,
72      SafeBrowsingDatabaseManager* database_manager,
73      ClientSideDetectionHost* host)
74      : params_(params),
75        web_contents_(web_contents),
76        csd_service_(csd_service),
77        database_manager_(database_manager),
78        host_(host),
79        start_phishing_classification_cb_(start_phishing_classification),
80        start_malware_classification_cb_(start_malware_classification) {
81    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
82    DCHECK(web_contents_);
83    DCHECK(csd_service_);
84    DCHECK(database_manager_.get());
85    DCHECK(host_);
86  }
87
88  void Start() {
89    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
90
91    // We start by doing some simple checks that can run on the UI thread.
92    UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
93    UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
94
95    // Only classify [X]HTML documents.
96    if (params_.contents_mime_type != "text/html" &&
97        params_.contents_mime_type != "application/xhtml+xml") {
98      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
99              << " because it has an unsupported MIME type: "
100              << params_.contents_mime_type;
101      DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
102    }
103
104    if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
105      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
106              << " because of hosting on private IP: "
107              << params_.socket_address.host();
108      DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
109      DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
110    }
111
112    // For phishing we only classify HTTP pages.
113    if (!params_.url.SchemeIs(url::kHttpScheme)) {
114      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
115              << " because it is not HTTP: "
116              << params_.socket_address.host();
117      DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
118    }
119
120    // Don't run any classifier if the tab is incognito.
121    if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
122      VLOG(1) << "Skipping phishing and malware classification for URL: "
123              << params_.url << " because we're browsing incognito.";
124      DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
125      DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
126    }
127
128    // We lookup the csd-whitelist before we lookup the cache because
129    // a URL may have recently been whitelisted.  If the URL matches
130    // the csd-whitelist we won't start phishing classification.  The
131    // csd-whitelist check has to be done on the IO thread because it
132    // uses the SafeBrowsing service class.
133    if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
134      BrowserThread::PostTask(
135          BrowserThread::IO,
136          FROM_HERE,
137          base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
138                     this, params_.url));
139    }
140  }
141
142  void Cancel() {
143    DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
144    DontClassifyForMalware(NO_CLASSIFY_CANCEL);
145    // Just to make sure we don't do anything stupid we reset all these
146    // pointers except for the safebrowsing service class which may be
147    // accessed by CheckSafeBrowsingDatabase().
148    web_contents_ = NULL;
149    csd_service_ = NULL;
150    host_ = NULL;
151  }
152
153 private:
154  friend class base::RefCountedThreadSafe<
155      ClientSideDetectionHost::ShouldClassifyUrlRequest>;
156
157  // Enum used to keep stats about why the pre-classification check failed.
158  enum PreClassificationCheckFailures {
159    OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
160    NO_CLASSIFY_PRIVATE_IP,
161    NO_CLASSIFY_OFF_THE_RECORD,
162    NO_CLASSIFY_MATCH_CSD_WHITELIST,
163    NO_CLASSIFY_TOO_MANY_REPORTS,
164    NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
165    NO_CLASSIFY_NO_DATABASE_MANAGER,
166    NO_CLASSIFY_KILLSWITCH,
167    NO_CLASSIFY_CANCEL,
168    NO_CLASSIFY_RESULT_FROM_CACHE,
169    NO_CLASSIFY_NOT_HTTP_URL,
170
171    NO_CLASSIFY_MAX  // Always add new values before this one.
172  };
173
174  // The destructor can be called either from the UI or the IO thread.
175  virtual ~ShouldClassifyUrlRequest() { }
176
177  bool ShouldClassifyForPhishing() const {
178    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
179    return !start_phishing_classification_cb_.is_null();
180  }
181
182  bool ShouldClassifyForMalware() const {
183    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
184    return !start_malware_classification_cb_.is_null();
185  }
186
187  void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
188    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
189    if (ShouldClassifyForPhishing()) {
190      // Track the first reason why we stopped classifying for phishing.
191      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
192                                reason, NO_CLASSIFY_MAX);
193      DVLOG(2) << "Failed phishing pre-classification checks.  Reason: "
194               << reason;
195      start_phishing_classification_cb_.Run(false);
196    }
197    start_phishing_classification_cb_.Reset();
198  }
199
200  void DontClassifyForMalware(PreClassificationCheckFailures reason) {
201    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
202    if (ShouldClassifyForMalware()) {
203      // Track the first reason why we stopped classifying for malware.
204      UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
205                                reason, NO_CLASSIFY_MAX);
206      DVLOG(2) << "Failed malware pre-classification checks.  Reason: "
207               << reason;
208      start_malware_classification_cb_.Run(false);
209    }
210    start_malware_classification_cb_.Reset();
211  }
212
213  void CheckSafeBrowsingDatabase(const GURL& url) {
214    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
215    // We don't want to call the classification callbacks from the IO
216    // thread so we simply pass the results of this method to CheckCache()
217    // which is called on the UI thread;
218    PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
219    PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
220    if (!database_manager_.get()) {
221      // We cannot check the Safe Browsing whitelists so we stop here
222      // for safety.
223      malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
224    } else {
225      if (database_manager_->MatchCsdWhitelistUrl(url)) {
226        VLOG(1) << "Skipping phishing classification for URL: " << url
227                << " because it matches the csd whitelist";
228        phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
229      }
230      if (database_manager_->IsMalwareKillSwitchOn()) {
231        malware_reason = NO_CLASSIFY_KILLSWITCH;
232      }
233    }
234    BrowserThread::PostTask(
235        BrowserThread::UI,
236        FROM_HERE,
237        base::Bind(&ShouldClassifyUrlRequest::CheckCache,
238                   this,
239                   phishing_reason,
240                   malware_reason));
241  }
242
243  void CheckCache(PreClassificationCheckFailures phishing_reason,
244                  PreClassificationCheckFailures malware_reason) {
245    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
246    if (phishing_reason != NO_CLASSIFY_MAX)
247      DontClassifyForPhishing(phishing_reason);
248    if (malware_reason != NO_CLASSIFY_MAX)
249      DontClassifyForMalware(malware_reason);
250    if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
251      return;  // No point in doing anything else.
252    }
253    // If result is cached, we don't want to run classification again.
254    // In that case we're just trying to show the warning.
255    bool is_phishing;
256    if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
257      VLOG(1) << "Satisfying request for " << params_.url << " from cache";
258      UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
259      // Since we are already on the UI thread, this is safe.
260      host_->MaybeShowPhishingWarning(params_.url, is_phishing);
261      DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
262    }
263
264    // We want to limit the number of requests, though we will ignore the
265    // limit for urls in the cache.  We don't want to start classifying
266    // too many pages as phishing, but for those that we already think are
267    // phishing we want to send a request to the server to give ourselves
268    // a chance to fix misclassifications.
269    if (csd_service_->IsInCache(params_.url)) {
270      VLOG(1) << "Reporting limit skipped for " << params_.url
271              << " as it was in the cache.";
272      UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
273    } else if (csd_service_->OverPhishingReportLimit()) {
274      VLOG(1) << "Too many report phishing requests sent recently, "
275              << "not running classification for " << params_.url;
276      DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
277    }
278    if (csd_service_->OverMalwareReportLimit()) {
279      DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
280    }
281
282    // Everything checks out, so start classification.
283    // |web_contents_| is safe to call as we will be destructed
284    // before it is.
285    if (ShouldClassifyForPhishing()) {
286      start_phishing_classification_cb_.Run(true);
287      // Reset the callback to make sure ShouldClassifyForPhishing()
288      // returns false.
289      start_phishing_classification_cb_.Reset();
290    }
291    if (ShouldClassifyForMalware()) {
292      start_malware_classification_cb_.Run(true);
293      // Reset the callback to make sure ShouldClassifyForMalware()
294      // returns false.
295      start_malware_classification_cb_.Reset();
296    }
297  }
298
299  content::FrameNavigateParams params_;
300  WebContents* web_contents_;
301  ClientSideDetectionService* csd_service_;
302  // We keep a ref pointer here just to make sure the safe browsing
303  // database manager stays alive long enough.
304  scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
305  ClientSideDetectionHost* host_;
306
307  ShouldClassifyUrlCallback start_phishing_classification_cb_;
308  ShouldClassifyUrlCallback start_malware_classification_cb_;
309
310  DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
311};
312
313// static
314ClientSideDetectionHost* ClientSideDetectionHost::Create(
315    WebContents* tab) {
316  return new ClientSideDetectionHost(tab);
317}
318
319ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
320    : content::WebContentsObserver(tab),
321      csd_service_(NULL),
322      classification_request_(NULL),
323      should_extract_malware_features_(true),
324      should_classify_for_malware_(false),
325      pageload_complete_(false),
326      unsafe_unique_page_id_(-1),
327      weak_factory_(this) {
328  DCHECK(tab);
329  // Note: csd_service_ and sb_service will be NULL here in testing.
330  csd_service_ = g_browser_process->safe_browsing_detection_service();
331  feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
332  registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
333                 content::Source<WebContents>(tab));
334
335  scoped_refptr<SafeBrowsingService> sb_service =
336      g_browser_process->safe_browsing_service();
337  if (sb_service.get()) {
338    ui_manager_ = sb_service->ui_manager();
339    database_manager_ = sb_service->database_manager();
340    ui_manager_->AddObserver(this);
341  }
342}
343
344ClientSideDetectionHost::~ClientSideDetectionHost() {
345  if (ui_manager_.get())
346    ui_manager_->RemoveObserver(this);
347}
348
349bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
350  bool handled = true;
351  IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
352    IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
353                        OnPhishingDetectionDone)
354    IPC_MESSAGE_UNHANDLED(handled = false)
355  IPC_END_MESSAGE_MAP()
356  return handled;
357}
358
359void ClientSideDetectionHost::DidNavigateMainFrame(
360    const content::LoadCommittedDetails& details,
361    const content::FrameNavigateParams& params) {
362  // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
363  // that don't call this method on the UI thread.
364  // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
365  if (details.is_in_page) {
366    // If the navigation is within the same page, the user isn't really
367    // navigating away.  We don't need to cancel a pending callback or
368    // begin a new classification.
369    return;
370  }
371  // Cancel any pending classification request.
372  if (classification_request_.get()) {
373    classification_request_->Cancel();
374  }
375  // If we navigate away and there currently is a pending phishing
376  // report request we have to cancel it to make sure we don't display
377  // an interstitial for the wrong page.  Note that this won't cancel
378  // the server ping back but only cancel the showing of the
379  // interstial.
380  weak_factory_.InvalidateWeakPtrs();
381
382  if (!csd_service_) {
383    return;
384  }
385  browse_info_.reset(new BrowseInfo);
386
387  // Store redirect chain information.
388  if (params.url.host() != cur_host_) {
389    cur_host_ = params.url.host();
390    cur_host_redirects_ = params.redirects;
391  }
392  browse_info_->url = params.url;
393  browse_info_->host_redirects = cur_host_redirects_;
394  browse_info_->url_redirects = params.redirects;
395  browse_info_->referrer = params.referrer.url;
396  browse_info_->http_status_code = details.http_status_code;
397  browse_info_->page_id = params.page_id;
398
399  should_extract_malware_features_ = true;
400  should_classify_for_malware_ = false;
401  pageload_complete_ = false;
402
403  // Check whether we can cassify the current URL for phishing or malware.
404  classification_request_ = new ShouldClassifyUrlRequest(
405      params,
406      base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
407                 weak_factory_.GetWeakPtr()),
408      base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
409                 weak_factory_.GetWeakPtr()),
410      web_contents(), csd_service_, database_manager_.get(), this);
411  classification_request_->Start();
412}
413
414void ClientSideDetectionHost::OnSafeBrowsingHit(
415    const SafeBrowsingUIManager::UnsafeResource& resource) {
416  if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
417    return;
418
419  // Check that the hit is either malware or phishing.
420  if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
421      resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
422    return;
423
424  // Check that this notification is really for us.
425  content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
426      resource.render_process_host_id, resource.render_view_id);
427  if (!hit_rvh ||
428      web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
429    return;
430
431  // Store the unique page ID for later.
432  unsafe_unique_page_id_ =
433      web_contents()->GetController().GetActiveEntry()->GetUniqueID();
434
435  // We also keep the resource around in order to be able to send the
436  // malicious URL to the server.
437  unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
438  unsafe_resource_->callback.Reset();  // Don't do anything stupid.
439}
440
441void ClientSideDetectionHost::OnSafeBrowsingMatch(
442    const SafeBrowsingUIManager::UnsafeResource& resource) {
443  if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
444    return;
445
446  // Check that this notification is really for us.
447  content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
448      resource.render_process_host_id, resource.render_view_id);
449  if (!hit_rvh ||
450      web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
451    return;
452
453  web_contents()->GetController().GetActiveEntry()->SetExtraData(
454      kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
455}
456
457scoped_refptr<SafeBrowsingDatabaseManager>
458ClientSideDetectionHost::database_manager() {
459  return database_manager_;
460}
461
462bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
463  if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
464    return false;
465
466  // If an interstitial page is showing, GetVisibleEntry will return the
467  // transient NavigationEntry for the interstitial. The transient entry
468  // will not have the flag set, so use the pending entry instead if there
469  // is one.
470  NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
471  if (!entry) {
472    entry = web_contents()->GetController().GetVisibleEntry();
473    if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
474      entry = web_contents()->GetController().GetLastCommittedEntry();
475    if (!entry)
476      return false;
477  }
478
479  base::string16 value;
480  return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
481}
482
483void ClientSideDetectionHost::WebContentsDestroyed() {
484  // Tell any pending classification request that it is being canceled.
485  if (classification_request_.get()) {
486    classification_request_->Cancel();
487  }
488  // Cancel all pending feature extractions.
489  feature_extractor_.reset();
490}
491
492void ClientSideDetectionHost::OnPhishingPreClassificationDone(
493    bool should_classify) {
494  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
495  if (browse_info_.get() && should_classify) {
496    VLOG(1) << "Instruct renderer to start phishing detection for URL: "
497            << browse_info_->url;
498    content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
499    rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
500        rvh->GetRoutingID(), browse_info_->url));
501  }
502}
503
504void ClientSideDetectionHost::OnMalwarePreClassificationDone(
505    bool should_classify) {
506  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
507  // If classification checks failed we should stop extracting malware features.
508  DVLOG(2) << "Malware pre-classification checks done. Should classify: "
509           << should_classify;
510  should_extract_malware_features_ = should_classify;
511  should_classify_for_malware_ = should_classify;
512  MaybeStartMalwareFeatureExtraction();
513}
514
515void ClientSideDetectionHost::DidStopLoading(content::RenderViewHost* rvh) {
516  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
517  if (!csd_service_ || !browse_info_.get())
518    return;
519  DVLOG(2) << "Page finished loading.";
520  pageload_complete_ = true;
521  MaybeStartMalwareFeatureExtraction();
522}
523
524void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
525  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
526  if (csd_service_ && browse_info_.get() &&
527      should_classify_for_malware_ &&
528      pageload_complete_) {
529    scoped_ptr<ClientMalwareRequest> malware_request(
530        new ClientMalwareRequest);
531    // Start browser-side malware feature extraction.  Once we're done it will
532    // send the malware client verdict request.
533    malware_request->set_url(browse_info_->url.spec());
534    const GURL& referrer = browse_info_->referrer;
535    if (referrer.SchemeIs("http")) {  // Only send http urls.
536      malware_request->set_referrer_url(referrer.spec());
537    }
538    // This function doesn't expect browse_info_ to stay around after this
539    // function returns.
540    feature_extractor_->ExtractMalwareFeatures(
541        browse_info_.get(),
542        malware_request.release(),
543        base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
544                   weak_factory_.GetWeakPtr()));
545    should_classify_for_malware_ = false;
546  }
547}
548
549void ClientSideDetectionHost::OnPhishingDetectionDone(
550    const std::string& verdict_str) {
551  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
552  // There is something seriously wrong if there is no service class but
553  // this method is called.  The renderer should not start phishing detection
554  // if there isn't any service class in the browser.
555  DCHECK(csd_service_);
556  DCHECK(browse_info_.get());
557
558  // We parse the protocol buffer here.  If we're unable to parse it we won't
559  // send the verdict further.
560  scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
561  if (csd_service_ &&
562      browse_info_.get() &&
563      verdict->ParseFromString(verdict_str) &&
564      verdict->IsInitialized()) {
565    // We only send phishing verdict to the server if the verdict is phishing or
566    // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
567    // malware or phishing interstitial was shown but the user clicked
568    // through.
569    if (verdict->is_phishing() || DidShowSBInterstitial()) {
570      if (DidShowSBInterstitial()) {
571        browse_info_->unsafe_resource.reset(unsafe_resource_.release());
572      }
573      // Start browser-side feature extraction.  Once we're done it will send
574      // the client verdict request.
575      feature_extractor_->ExtractFeatures(
576          browse_info_.get(),
577          verdict.release(),
578          base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
579                     weak_factory_.GetWeakPtr()));
580    }
581  }
582}
583
584void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
585                                                       bool is_phishing) {
586  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
587  DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
588           << " is_phishing:" << is_phishing;
589  if (is_phishing) {
590    DCHECK(web_contents());
591    if (ui_manager_.get()) {
592      SafeBrowsingUIManager::UnsafeResource resource;
593      resource.url = phishing_url;
594      resource.original_url = phishing_url;
595      resource.is_subresource = false;
596      resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
597      resource.render_process_host_id =
598          web_contents()->GetRenderProcessHost()->GetID();
599      resource.render_view_id =
600          web_contents()->GetRenderViewHost()->GetRoutingID();
601      if (!ui_manager_->IsWhitelisted(resource)) {
602        // We need to stop any pending navigations, otherwise the interstital
603        // might not get created properly.
604        web_contents()->GetController().DiscardNonCommittedEntries();
605      }
606      ui_manager_->DisplayBlockingPage(resource);
607    }
608    // If there is true phishing verdict, invalidate weakptr so that no longer
609    // consider the malware vedict.
610    weak_factory_.InvalidateWeakPtrs();
611  }
612}
613
614void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
615                                                      GURL malware_url,
616                                                      bool is_malware) {
617  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
618  DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
619           << " is_malware:" << is_malware;
620  if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
621    DCHECK(web_contents());
622    if (ui_manager_.get()) {
623      SafeBrowsingUIManager::UnsafeResource resource;
624      resource.url = malware_url;
625      resource.original_url = original_url;
626      resource.is_subresource = (malware_url.host() != original_url.host());
627      resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
628      resource.render_process_host_id =
629          web_contents()->GetRenderProcessHost()->GetID();
630      resource.render_view_id =
631          web_contents()->GetRenderViewHost()->GetRoutingID();
632      if (!ui_manager_->IsWhitelisted(resource)) {
633        // We need to stop any pending navigations, otherwise the interstital
634        // might not get created properly.
635        web_contents()->GetController().DiscardNonCommittedEntries();
636      }
637      ui_manager_->DisplayBlockingPage(resource);
638    }
639    // If there is true malware verdict, invalidate weakptr so that no longer
640    // consider the phishing vedict.
641    weak_factory_.InvalidateWeakPtrs();
642  }
643}
644
645void ClientSideDetectionHost::FeatureExtractionDone(
646    bool success,
647    scoped_ptr<ClientPhishingRequest> request) {
648  DCHECK(request);
649  DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
650           << request->url() << ". Start sending client phishing request.";
651  ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
652  // If the client-side verdict isn't phishing we don't care about the server
653  // response because we aren't going to display a warning.
654  if (request->is_phishing()) {
655    callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
656                          weak_factory_.GetWeakPtr());
657  }
658  // Send ping even if the browser feature extraction failed.
659  csd_service_->SendClientReportPhishingRequest(
660      request.release(),  // The service takes ownership of the request object.
661      callback);
662}
663
664void ClientSideDetectionHost::MalwareFeatureExtractionDone(
665    bool feature_extraction_success,
666    scoped_ptr<ClientMalwareRequest> request) {
667  DCHECK(request.get());
668  DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
669           << ", with badip url count:" << request->bad_ip_url_info_size();
670
671  // Send ping if there is matching features.
672  if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
673    VLOG(1) << "Start sending client malware request.";
674    ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
675    callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
676                          weak_factory_.GetWeakPtr());
677    csd_service_->SendClientReportMalwareRequest(request.release(), callback);
678  }
679}
680
681void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
682                                             const std::string& url,
683                                             const std::string& method,
684                                             const std::string& referrer,
685                                             const ResourceType resource_type) {
686  if (ip.empty() || url.empty())
687    return;
688
689  IPUrlMap::iterator it = browse_info_->ips.find(ip);
690  if (it == browse_info_->ips.end()) {
691    if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
692      std::vector<IPUrlInfo> url_infos;
693      url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
694      browse_info_->ips.insert(make_pair(ip, url_infos));
695    }
696  } else if (it->second.size() < kMaxUrlsPerIP) {
697    it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
698  }
699}
700
701void ClientSideDetectionHost::Observe(
702    int type,
703    const content::NotificationSource& source,
704    const content::NotificationDetails& details) {
705  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
706  DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
707  const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
708      details).ptr();
709  if (req && browse_info_.get() &&
710      should_extract_malware_features_ && req->url.is_valid()) {
711    UpdateIPUrlMap(req->socket_address.host() /* ip */,
712                   req->url.spec()  /* url */,
713                   req->method,
714                   req->referrer,
715                   req->resource_type);
716  }
717}
718
719bool ClientSideDetectionHost::DidShowSBInterstitial() const {
720  if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
721    return false;
722  }
723  const NavigationEntry* nav_entry =
724      web_contents()->GetController().GetActiveEntry();
725  return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
726}
727
728void ClientSideDetectionHost::set_client_side_detection_service(
729    ClientSideDetectionService* service) {
730  csd_service_ = service;
731}
732
733void ClientSideDetectionHost::set_safe_browsing_managers(
734    SafeBrowsingUIManager* ui_manager,
735    SafeBrowsingDatabaseManager* database_manager) {
736  if (ui_manager_.get())
737    ui_manager_->RemoveObserver(this);
738
739  ui_manager_ = ui_manager;
740  if (ui_manager)
741    ui_manager_->AddObserver(this);
742
743  database_manager_ = database_manager;
744}
745
746}  // namespace safe_browsing
747