client_side_detection_host.cc revision 868fa2fe829687343ffae624259930155e16dbd8
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/client_side_detection_host.h"
6
7#include <vector>
8
9#include "base/logging.h"
10#include "base/memory/ref_counted.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/metrics/histogram.h"
13#include "base/prefs/pref_service.h"
14#include "base/sequenced_task_runner_helpers.h"
15#include "chrome/browser/browser_process.h"
16#include "chrome/browser/profiles/profile.h"
17#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
18#include "chrome/browser/safe_browsing/client_side_detection_service.h"
19#include "chrome/browser/safe_browsing/database_manager.h"
20#include "chrome/browser/safe_browsing/safe_browsing_service.h"
21#include "chrome/common/chrome_switches.h"
22#include "chrome/common/chrome_version_info.h"
23#include "chrome/common/pref_names.h"
24#include "chrome/common/safe_browsing/csd.pb.h"
25#include "chrome/common/safe_browsing/safebrowsing_messages.h"
26#include "content/public/browser/browser_thread.h"
27#include "content/public/browser/navigation_controller.h"
28#include "content/public/browser/navigation_details.h"
29#include "content/public/browser/navigation_entry.h"
30#include "content/public/browser/notification_details.h"
31#include "content/public/browser/notification_source.h"
32#include "content/public/browser/notification_types.h"
33#include "content/public/browser/render_process_host.h"
34#include "content/public/browser/render_view_host.h"
35#include "content/public/browser/resource_request_details.h"
36#include "content/public/browser/web_contents.h"
37#include "content/public/common/frame_navigate_params.h"
38#include "googleurl/src/gurl.h"
39
40using content::BrowserThread;
41using content::NavigationEntry;
42using content::ResourceRequestDetails;
43using content::WebContents;
44
45namespace safe_browsing {
46
47const int ClientSideDetectionHost::kMaxHostsPerIP = 20;
48const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
49
50namespace {
51
52void EmptyUrlCheckCallback(bool processed) {
53  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
54}
55
56}  // namespace
57
58// This class is instantiated each time a new toplevel URL loads, and
59// asynchronously checks whether the phishing classifier should run for this
60// URL.  If so, it notifies the renderer with a StartPhishingDetection IPC.
61// Objects of this class are ref-counted and will be destroyed once nobody
62// uses it anymore.  If |web_contents|, |csd_service| or |host| go away you need
63// to call Cancel().  We keep the |database_manager| alive in a ref pointer for
64// as long as it takes.
65class ClientSideDetectionHost::ShouldClassifyUrlRequest
66    : public base::RefCountedThreadSafe<
67          ClientSideDetectionHost::ShouldClassifyUrlRequest> {
68 public:
69  ShouldClassifyUrlRequest(const content::FrameNavigateParams& params,
70                           WebContents* web_contents,
71                           ClientSideDetectionService* csd_service,
72                           SafeBrowsingDatabaseManager* database_manager,
73                           ClientSideDetectionHost* host)
74      : canceled_(false),
75        params_(params),
76        web_contents_(web_contents),
77        csd_service_(csd_service),
78        database_manager_(database_manager),
79        host_(host) {
80    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
81    DCHECK(web_contents_);
82    DCHECK(csd_service_);
83    DCHECK(database_manager_.get());
84    DCHECK(host_);
85  }
86
87  void Start() {
88    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
89
90    // We start by doing some simple checks that can run on the UI thread.
91    UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
92
93    // Only classify [X]HTML documents.
94    if (params_.contents_mime_type != "text/html" &&
95        params_.contents_mime_type != "application/xhtml+xml") {
96      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
97              << " because it has an unsupported MIME type: "
98              << params_.contents_mime_type;
99      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
100                                NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
101                                NO_CLASSIFY_MAX);
102      return;
103    }
104
105    if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
106      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
107              << " because of hosting on private IP: "
108              << params_.socket_address.host();
109      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
110                                NO_CLASSIFY_PRIVATE_IP,
111                                NO_CLASSIFY_MAX);
112      return;
113    }
114
115    // Don't run the phishing classifier if the tab is incognito.
116    if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
117      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
118              << " because we're browsing incognito.";
119      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
120                                NO_CLASSIFY_OFF_THE_RECORD,
121                                NO_CLASSIFY_MAX);
122
123      return;
124    }
125
126    // We lookup the csd-whitelist before we lookup the cache because
127    // a URL may have recently been whitelisted.  If the URL matches
128    // the csd-whitelist we won't start classification.  The
129    // csd-whitelist check has to be done on the IO thread because it
130    // uses the SafeBrowsing service class.
131    BrowserThread::PostTask(
132        BrowserThread::IO,
133        FROM_HERE,
134        base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,
135                   this, params_.url));
136  }
137
138  void Cancel() {
139    canceled_ = true;
140    // Just to make sure we don't do anything stupid we reset all these
141    // pointers except for the safebrowsing service class which may be
142    // accessed by CheckCsdWhitelist().
143    web_contents_ = NULL;
144    csd_service_ = NULL;
145    host_ = NULL;
146  }
147
148 private:
149  friend class base::RefCountedThreadSafe<
150      ClientSideDetectionHost::ShouldClassifyUrlRequest>;
151
152  // Enum used to keep stats about why the pre-classification check failed.
153  enum PreClassificationCheckFailures {
154    OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
155    NO_CLASSIFY_PRIVATE_IP,
156    NO_CLASSIFY_OFF_THE_RECORD,
157    NO_CLASSIFY_MATCH_CSD_WHITELIST,
158    NO_CLASSIFY_TOO_MANY_REPORTS,
159    NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
160
161    NO_CLASSIFY_MAX  // Always add new values before this one.
162  };
163
164  // The destructor can be called either from the UI or the IO thread.
165  virtual ~ShouldClassifyUrlRequest() { }
166
167  void CheckCsdWhitelist(const GURL& url) {
168    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
169    if (!database_manager_.get() ||
170        database_manager_->MatchCsdWhitelistUrl(url)) {
171      // We're done.  There is no point in going back to the UI thread.
172      VLOG(1) << "Skipping phishing classification for URL: " << url
173              << " because it matches the csd whitelist";
174      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
175                                NO_CLASSIFY_MATCH_CSD_WHITELIST,
176                                NO_CLASSIFY_MAX);
177      return;
178    }
179
180    BrowserThread::PostTask(
181        BrowserThread::UI,
182        FROM_HERE,
183        base::Bind(&ShouldClassifyUrlRequest::CheckCache, this));
184  }
185
186  void CheckCache() {
187    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
188    if (canceled_) {
189      return;
190    }
191
192    // If result is cached, we don't want to run classification again
193    bool is_phishing;
194    if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
195      VLOG(1) << "Satisfying request for " << params_.url << " from cache";
196      UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
197      // Since we are already on the UI thread, this is safe.
198      host_->MaybeShowPhishingWarning(params_.url, is_phishing);
199      return;
200    }
201
202    // We want to limit the number of requests, though we will ignore the
203    // limit for urls in the cache.  We don't want to start classifying
204    // too many pages as phishing, but for those that we already think are
205    // phishing we want to give ourselves a chance to fix false positives.
206    if (csd_service_->IsInCache(params_.url)) {
207      VLOG(1) << "Reporting limit skipped for " << params_.url
208              << " as it was in the cache.";
209      UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
210    } else if (csd_service_->OverPhishingReportLimit()) {
211      VLOG(1) << "Too many report phishing requests sent recently, "
212              << "not running classification for " << params_.url;
213      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
214                                NO_CLASSIFY_TOO_MANY_REPORTS,
215                                NO_CLASSIFY_MAX);
216      return;
217    }
218
219    // Everything checks out, so start classification.
220    // |web_contents_| is safe to call as we will be destructed
221    // before it is.
222    VLOG(1) << "Instruct renderer to start phishing detection for URL: "
223            << params_.url;
224    content::RenderViewHost* rvh = web_contents_->GetRenderViewHost();
225    rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
226        rvh->GetRoutingID(), params_.url));
227  }
228
229  // No need to protect |canceled_| with a lock because it is only read and
230  // written by the UI thread.
231  bool canceled_;
232  content::FrameNavigateParams params_;
233  WebContents* web_contents_;
234  ClientSideDetectionService* csd_service_;
235  // We keep a ref pointer here just to make sure the safe browsing
236  // database manager stays alive long enough.
237  scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
238  ClientSideDetectionHost* host_;
239
240  DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
241};
242
243// static
244ClientSideDetectionHost* ClientSideDetectionHost::Create(
245    WebContents* tab) {
246  return new ClientSideDetectionHost(tab);
247}
248
249ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
250    : content::WebContentsObserver(tab),
251      csd_service_(NULL),
252      weak_factory_(this),
253      unsafe_unique_page_id_(-1),
254      malware_report_enabled_(false) {
255  DCHECK(tab);
256  // Note: csd_service_ and sb_service will be NULL here in testing.
257  csd_service_ = g_browser_process->safe_browsing_detection_service();
258  feature_extractor_.reset(new BrowserFeatureExtractor(tab, csd_service_));
259  registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
260                 content::Source<WebContents>(tab));
261
262  scoped_refptr<SafeBrowsingService> sb_service =
263      g_browser_process->safe_browsing_service();
264  if (sb_service.get()) {
265    ui_manager_ = sb_service->ui_manager();
266    database_manager_ = sb_service->database_manager();
267    ui_manager_->AddObserver(this);
268  }
269
270  // Only enable the malware bad IP matching and report feature for canary
271  // and dev channel.
272  chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
273  malware_report_enabled_ = (
274      channel == chrome::VersionInfo::CHANNEL_DEV ||
275      channel == chrome::VersionInfo::CHANNEL_CANARY);
276}
277
278ClientSideDetectionHost::~ClientSideDetectionHost() {
279  if (ui_manager_.get())
280    ui_manager_->RemoveObserver(this);
281}
282
283bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
284  bool handled = true;
285  IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
286    IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
287                        OnPhishingDetectionDone)
288    IPC_MESSAGE_UNHANDLED(handled = false)
289  IPC_END_MESSAGE_MAP()
290  return handled;
291}
292
293void ClientSideDetectionHost::DidNavigateMainFrame(
294    const content::LoadCommittedDetails& details,
295    const content::FrameNavigateParams& params) {
296  // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
297  // that don't call this method on the UI thread.
298  // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
299  if (details.is_in_page) {
300    // If the navigation is within the same page, the user isn't really
301    // navigating away.  We don't need to cancel a pending callback or
302    // begin a new classification.
303    return;
304  }
305  // If we navigate away and there currently is a pending phishing
306  // report request we have to cancel it to make sure we don't display
307  // an interstitial for the wrong page.  Note that this won't cancel
308  // the server ping back but only cancel the showing of the
309  // interstial.
310  weak_factory_.InvalidateWeakPtrs();
311
312  if (!csd_service_) {
313    return;
314  }
315
316  // Cancel any pending classification request.
317  if (classification_request_.get()) {
318    classification_request_->Cancel();
319  }
320  browse_info_.reset(new BrowseInfo);
321
322  // Store redirect chain information.
323  if (params.url.host() != cur_host_) {
324    cur_host_ = params.url.host();
325    cur_host_redirects_ = params.redirects;
326  }
327  browse_info_->host_redirects = cur_host_redirects_;
328  browse_info_->url_redirects = params.redirects;
329  browse_info_->http_status_code = details.http_status_code;
330
331  // Notify the renderer if it should classify this URL.
332  classification_request_ = new ShouldClassifyUrlRequest(
333      params, web_contents(), csd_service_, database_manager_.get(), this);
334  classification_request_->Start();
335}
336
337void ClientSideDetectionHost::OnSafeBrowsingHit(
338    const SafeBrowsingUIManager::UnsafeResource& resource) {
339  // Check that this notification is really for us and that it corresponds to
340  // either a malware or phishing hit.  In this case we store the unique page
341  // ID for later.
342  if (web_contents() &&
343      web_contents()->GetRenderProcessHost()->GetID() ==
344          resource.render_process_host_id &&
345      web_contents()->GetRenderViewHost()->GetRoutingID() ==
346          resource.render_view_id &&
347      (resource.threat_type == SB_THREAT_TYPE_URL_PHISHING ||
348       resource.threat_type == SB_THREAT_TYPE_URL_MALWARE) &&
349      web_contents()->GetController().GetActiveEntry()) {
350    unsafe_unique_page_id_ =
351        web_contents()->GetController().GetActiveEntry()->GetUniqueID();
352    // We also keep the resource around in order to be able to send the
353    // malicious URL to the server.
354    unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
355    unsafe_resource_->callback.Reset();  // Don't do anything stupid.
356  }
357}
358
359void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {
360  DCHECK(tab);
361  // Tell any pending classification request that it is being canceled.
362  if (classification_request_.get()) {
363    classification_request_->Cancel();
364  }
365  // Cancel all pending feature extractions.
366  feature_extractor_.reset();
367}
368
369void ClientSideDetectionHost::OnPhishingDetectionDone(
370    const std::string& verdict_str) {
371  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
372  // There is something seriously wrong if there is no service class but
373  // this method is called.  The renderer should not start phishing detection
374  // if there isn't any service class in the browser.
375  DCHECK(csd_service_);
376  // There shouldn't be any pending requests because we revoke them everytime
377  // we navigate away.
378  DCHECK(!weak_factory_.HasWeakPtrs());
379  DCHECK(browse_info_.get());
380
381  // We parse the protocol buffer here.  If we're unable to parse it we won't
382  // send the verdict further.
383  scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
384  if (csd_service_ &&
385      !weak_factory_.HasWeakPtrs() &&
386      browse_info_.get() &&
387      verdict->ParseFromString(verdict_str) &&
388      verdict->IsInitialized()) {
389    if (malware_report_enabled_) {
390      scoped_ptr<ClientMalwareRequest> malware_verdict(
391          new ClientMalwareRequest);
392      // Start browser-side malware feature extraction.  Once we're done it will
393      // send the malware client verdict request.
394      malware_verdict->set_url(verdict->url());
395      feature_extractor_->ExtractMalwareFeatures(
396          browse_info_.get(),
397          malware_verdict.get());
398      MalwareFeatureExtractionDone(malware_verdict.Pass());
399    }
400
401    // We only send phishing verdict to the server if the verdict is phishing or
402    // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
403    // malware or phishing interstitial was shown but the user clicked
404    // through.
405    if (verdict->is_phishing() || DidShowSBInterstitial()) {
406      if (DidShowSBInterstitial()) {
407        browse_info_->unsafe_resource.reset(unsafe_resource_.release());
408      }
409      // Start browser-side feature extraction.  Once we're done it will send
410      // the client verdict request.
411      feature_extractor_->ExtractFeatures(
412          browse_info_.get(),
413          verdict.release(),
414          base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
415                     weak_factory_.GetWeakPtr()));
416    }
417  }
418  browse_info_.reset();
419}
420
421void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
422                                                       bool is_phishing) {
423  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
424  VLOG(2) << "Received server phishing verdict for URL:" << phishing_url
425          << " is_phishing:" << is_phishing;
426  if (is_phishing) {
427    DCHECK(web_contents());
428    if (ui_manager_.get()) {
429      SafeBrowsingUIManager::UnsafeResource resource;
430      resource.url = phishing_url;
431      resource.original_url = phishing_url;
432      resource.is_subresource = false;
433      resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
434      resource.render_process_host_id =
435          web_contents()->GetRenderProcessHost()->GetID();
436      resource.render_view_id =
437          web_contents()->GetRenderViewHost()->GetRoutingID();
438      if (!ui_manager_->IsWhitelisted(resource)) {
439        // We need to stop any pending navigations, otherwise the interstital
440        // might not get created properly.
441        web_contents()->GetController().DiscardNonCommittedEntries();
442        resource.callback = base::Bind(&EmptyUrlCheckCallback);
443        ui_manager_->DoDisplayBlockingPage(resource);
444      }
445    }
446  }
447}
448
449void ClientSideDetectionHost::FeatureExtractionDone(
450    bool success,
451    ClientPhishingRequest* request) {
452  if (!request) {
453    DLOG(FATAL) << "Invalid request object in FeatureExtractionDone";
454    return;
455  }
456  VLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
457          << request->url() << ". Start sending client phishing request.";
458  ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
459  // If the client-side verdict isn't phishing we don't care about the server
460  // response because we aren't going to display a warning.
461  if (request->is_phishing()) {
462    callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
463                          weak_factory_.GetWeakPtr());
464  }
465  // Send ping even if the browser feature extraction failed.
466  csd_service_->SendClientReportPhishingRequest(
467      request,  // The service takes ownership of the request object.
468      callback);
469}
470
471void ClientSideDetectionHost::MalwareFeatureExtractionDone(
472    scoped_ptr<ClientMalwareRequest> request) {
473  if (!request) {
474    DLOG(FATAL) << "Invalid request object in MalwareFeatureExtractionDone";
475    return;
476  }
477  VLOG(2) << "Malware Feature extraction done for URL: " << request->url()
478          << ", with features count:" << request->feature_map_size();
479
480  // Send ping if there is matching features.
481  if (request->feature_map_size() > 0) {
482    VLOG(1) << "Start sending client malware request.";
483    ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
484    csd_service_->SendClientReportMalwareRequest(
485        request.release(),  // The service takes ownership of the request object
486        callback);  // no action after request sent for now
487  }
488}
489
490void ClientSideDetectionHost::UpdateIPHostMap(const std::string& ip,
491                                              const std::string& host) {
492  if (ip.empty() || host.empty())
493    return;
494
495  IPHostMap::iterator it = browse_info_->ips.find(ip);
496  if (it == browse_info_->ips.end()) {
497    if (int(browse_info_->ips.size()) < kMaxIPsPerBrowse) {
498      std::set<std::string> hosts;
499      hosts.insert(host);
500      browse_info_->ips.insert(make_pair(ip, hosts));
501    }
502  } else if (int(it->second.size()) < kMaxHostsPerIP) {
503    it->second.insert(host);
504  }
505}
506
507void ClientSideDetectionHost::Observe(
508    int type,
509    const content::NotificationSource& source,
510    const content::NotificationDetails& details) {
511  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
512  DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
513  const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
514      details).ptr();
515  if (req && browse_info_.get()) {
516    UpdateIPHostMap(req->socket_address.host() /* ip */,
517                    req->url.host()  /* url host */);
518  }
519}
520
521bool ClientSideDetectionHost::DidShowSBInterstitial() {
522  if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
523    return false;
524  }
525  const NavigationEntry* nav_entry =
526      web_contents()->GetController().GetActiveEntry();
527  return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
528}
529
530void ClientSideDetectionHost::set_client_side_detection_service(
531    ClientSideDetectionService* service) {
532  csd_service_ = service;
533}
534
535void ClientSideDetectionHost::set_safe_browsing_managers(
536    SafeBrowsingUIManager* ui_manager,
537    SafeBrowsingDatabaseManager* database_manager) {
538  if (ui_manager_.get())
539    ui_manager_->RemoveObserver(this);
540
541  ui_manager_ = ui_manager;
542  if (ui_manager)
543    ui_manager_->AddObserver(this);
544
545  database_manager_ = database_manager;
546}
547
548}  // namespace safe_browsing
549