1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/client_side_detection_host.h"
6
7#include <vector>
8
9#include "base/command_line.h"
10#include "base/logging.h"
11#include "base/memory/ref_counted.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/metrics/histogram.h"
14#include "base/task.h"
15#include "chrome/browser/browser_process.h"
16#include "chrome/browser/profiles/profile.h"
17#include "chrome/browser/safe_browsing/client_side_detection_service.h"
18#include "chrome/browser/safe_browsing/safe_browsing_service.h"
19#include "chrome/common/chrome_switches.h"
20#include "chrome/common/safe_browsing/csd.pb.h"
21#include "chrome/common/safe_browsing/safebrowsing_messages.h"
22#include "content/browser/browser_thread.h"
23#include "content/browser/renderer_host/render_process_host.h"
24#include "content/browser/renderer_host/render_view_host.h"
25#include "content/browser/renderer_host/resource_dispatcher_host.h"
26#include "content/browser/tab_contents/navigation_controller.h"
27#include "content/browser/tab_contents/tab_contents.h"
28#include "content/common/notification_service.h"
29#include "content/common/notification_type.h"
30#include "content/common/view_messages.h"
31#include "googleurl/src/gurl.h"
32
33namespace safe_browsing {
34
35// This class is instantiated each time a new toplevel URL loads, and
36// asynchronously checks whether the phishing classifier should run for this
37// URL.  If so, it notifies the renderer with a StartPhishingDetection IPC.
38// Objects of this class are ref-counted and will be destroyed once nobody
39// uses it anymore.  If |tab_contents|, |csd_service| or |host| go away you need
40// to call Cancel().  We keep the |sb_service| alive in a ref pointer for as
41// long as it takes.
42class ClientSideDetectionHost::ShouldClassifyUrlRequest
43    : public base::RefCountedThreadSafe<
44          ClientSideDetectionHost::ShouldClassifyUrlRequest> {
45 public:
46  ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params& params,
47                           TabContents* tab_contents,
48                           ClientSideDetectionService* csd_service,
49                           SafeBrowsingService* sb_service,
50                           ClientSideDetectionHost* host)
51      : canceled_(false),
52        params_(params),
53        tab_contents_(tab_contents),
54        csd_service_(csd_service),
55        sb_service_(sb_service),
56        host_(host) {
57    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
58    DCHECK(tab_contents_);
59    DCHECK(csd_service_);
60    DCHECK(sb_service_);
61    DCHECK(host_);
62  }
63
64  void Start() {
65    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
66
67    // We start by doing some simple checks that can run on the UI thread.
68    UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
69
70    // Only classify [X]HTML documents.
71    if (params_.contents_mime_type != "text/html" &&
72        params_.contents_mime_type != "application/xhtml+xml") {
73      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
74              << " because it has an unsupported MIME type: "
75              << params_.contents_mime_type;
76      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
77                                NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
78                                NO_CLASSIFY_MAX);
79      return;
80    }
81
82    // Don't run the phishing classifier if the URL came from a private
83    // network, since we don't want to ping back in this case.  We also need
84    // to check whether the connection was proxied -- if so, we won't have the
85    // correct remote IP address, and will skip phishing classification.
86    if (params_.was_fetched_via_proxy) {
87      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
88              << " because it was fetched via a proxy.";
89      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
90                                NO_CLASSIFY_PROXY_FETCH,
91                                NO_CLASSIFY_MAX);
92      return;
93    }
94    if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
95      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
96              << " because of hosting on private IP: "
97              << params_.socket_address.host();
98      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
99                                NO_CLASSIFY_PRIVATE_IP,
100                                NO_CLASSIFY_MAX);
101      return;
102    }
103
104    // Don't run the phishing classifier if the tab is incognito.
105    if (tab_contents_->profile()->IsOffTheRecord()) {
106      VLOG(1) << "Skipping phishing classification for URL: " << params_.url
107              << " because we're browsing incognito.";
108      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
109                                NO_CLASSIFY_OFF_THE_RECORD,
110                                NO_CLASSIFY_MAX);
111
112      return;
113    }
114
115    // We lookup the csd-whitelist before we lookup the cache because
116    // a URL may have recently been whitelisted.  If the URL matches
117    // the csd-whitelist we won't start classification.  The
118    // csd-whitelist check has to be done on the IO thread because it
119    // uses the SafeBrowsing service class.
120    BrowserThread::PostTask(
121        BrowserThread::IO,
122        FROM_HERE,
123        NewRunnableMethod(this,
124                          &ShouldClassifyUrlRequest::CheckCsdWhitelist,
125                          params_.url));
126  }
127
128  void Cancel() {
129    canceled_ = true;
130    // Just to make sure we don't do anything stupid we reset all these
131    // pointers except for the safebrowsing service class which may be
132    // accessed by CheckCsdWhitelist().
133    tab_contents_ = NULL;
134    csd_service_ = NULL;
135    host_ = NULL;
136  }
137
138 private:
139  friend class base::RefCountedThreadSafe<
140      ClientSideDetectionHost::ShouldClassifyUrlRequest>;
141
142  // Enum used to keep stats about why the pre-classification check failed.
143  enum PreClassificationCheckFailures {
144    NO_CLASSIFY_PROXY_FETCH,
145    NO_CLASSIFY_PRIVATE_IP,
146    NO_CLASSIFY_OFF_THE_RECORD,
147    NO_CLASSIFY_MATCH_CSD_WHITELIST,
148    NO_CLASSIFY_TOO_MANY_REPORTS,
149    NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
150
151    NO_CLASSIFY_MAX  // Always add new values before this one.
152  };
153
154  // The destructor can be called either from the UI or the IO thread.
155  virtual ~ShouldClassifyUrlRequest() { }
156
157  void CheckCsdWhitelist(const GURL& url) {
158    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
159    if (!sb_service_ || sb_service_->MatchCsdWhitelistUrl(url)) {
160      // We're done.  There is no point in going back to the UI thread.
161      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
162                                NO_CLASSIFY_MATCH_CSD_WHITELIST,
163                                NO_CLASSIFY_MAX);
164      return;
165    }
166
167    BrowserThread::PostTask(
168        BrowserThread::UI,
169        FROM_HERE,
170        NewRunnableMethod(this,
171                          &ShouldClassifyUrlRequest::CheckCache));
172  }
173
174  void CheckCache() {
175    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
176    if (canceled_) {
177      return;
178    }
179
180    // If result is cached, we don't want to run classification again
181    bool is_phishing;
182    if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
183      VLOG(1) << "Satisfying request for " << params_.url << " from cache";
184      UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
185      // Since we are already on the UI thread, this is safe.
186      host_->MaybeShowPhishingWarning(params_.url, is_phishing);
187      return;
188    }
189
190    // We want to limit the number of requests, though we will ignore the
191    // limit for urls in the cache.  We don't want to start classifying
192    // too many pages as phishing, but for those that we already think are
193    // phishing we want to give ourselves a chance to fix false positives.
194    if (csd_service_->IsInCache(params_.url)) {
195      VLOG(1) << "Reporting limit skipped for " << params_.url
196              << " as it was in the cache.";
197      UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
198    } else if (csd_service_->OverReportLimit()) {
199      VLOG(1) << "Too many report phishing requests sent recently, "
200              << "not running classification for " << params_.url;
201      UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
202                                NO_CLASSIFY_TOO_MANY_REPORTS,
203                                NO_CLASSIFY_MAX);
204      return;
205    }
206
207    // Everything checks out, so start classification.
208    // |tab_contents_| is safe to call as we will be destructed
209    // before it is.
210    RenderViewHost* rvh = tab_contents_->render_view_host();
211    rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
212        rvh->routing_id(), params_.url));
213  }
214
215  // No need to protect |canceled_| with a lock because it is only read and
216  // written by the UI thread.
217  bool canceled_;
218  ViewHostMsg_FrameNavigate_Params params_;
219  TabContents* tab_contents_;
220  ClientSideDetectionService* csd_service_;
221  // We keep a ref pointer here just to make sure the service class stays alive
222  // long enough.
223  scoped_refptr<SafeBrowsingService> sb_service_;
224  ClientSideDetectionHost* host_;
225
226  DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
227};
228
229// This class is used to display the phishing interstitial.
230class CsdClient : public SafeBrowsingService::Client {
231 public:
232  CsdClient() {}
233
234  // Method from SafeBrowsingService::Client.  This method is called on the
235  // IO thread once the interstitial is going away.  This method simply deletes
236  // the CsdClient object.
237  virtual void OnBlockingPageComplete(bool proceed) {
238    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
239    // Delete this on the UI thread since it was created there.
240    BrowserThread::PostTask(BrowserThread::UI,
241                            FROM_HERE,
242                            new DeleteTask<CsdClient>(this));
243  }
244
245 private:
246  friend class DeleteTask<CsdClient>;  // Calls the private destructor.
247
248  // We're taking care of deleting this object.  No-one else should delete
249  // this object.
250  virtual ~CsdClient() {}
251
252  DISALLOW_COPY_AND_ASSIGN(CsdClient);
253};
254
255ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab)
256    : TabContentsObserver(tab),
257      csd_service_(g_browser_process->safe_browsing_detection_service()),
258      cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)) {
259  DCHECK(tab);
260  // Note: csd_service_ and sb_service_ might be NULL.
261  ResourceDispatcherHost* resource =
262      g_browser_process->resource_dispatcher_host();
263  if (resource) {
264    sb_service_ = resource->safe_browsing_service();
265  }
266}
267
268ClientSideDetectionHost::~ClientSideDetectionHost() {
269  // Tell any pending classification request that it is being canceled.
270  if (classification_request_.get()) {
271    classification_request_->Cancel();
272  }
273}
274
275bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
276  bool handled = true;
277  IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
278    IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_DetectedPhishingSite,
279                        OnDetectedPhishingSite)
280    IPC_MESSAGE_UNHANDLED(handled = false)
281  IPC_END_MESSAGE_MAP()
282  return handled;
283}
284
285void ClientSideDetectionHost::DidNavigateMainFramePostCommit(
286    const NavigationController::LoadCommittedDetails& details,
287    const ViewHostMsg_FrameNavigate_Params& params) {
288  // TODO(noelutz): move this DCHECK to TabContents and fix all the unit tests
289  // that don't call this method on the UI thread.
290  // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
291
292  if (details.is_in_page) {
293    // If the navigation is within the same page, the user isn't really
294    // navigating away.  We don't need to cancel a pending callback or
295    // begin a new classification.
296    return;
297  }
298
299  // If we navigate away and there currently is a pending phishing
300  // report request we have to cancel it to make sure we don't display
301  // an interstitial for the wrong page.  Note that this won't cancel
302  // the server ping back but only cancel the showing of the
303  // interstial.
304  cb_factory_.RevokeAll();
305
306  if (csd_service_) {
307    // Cancel any pending classification request.
308    if (classification_request_.get()) {
309      classification_request_->Cancel();
310    }
311
312    // Notify the renderer if it should classify this URL.
313    classification_request_ = new ShouldClassifyUrlRequest(params,
314                                                           tab_contents(),
315                                                           csd_service_,
316                                                           sb_service_,
317                                                           this);
318    classification_request_->Start();
319  }
320}
321
322void ClientSideDetectionHost::OnDetectedPhishingSite(
323    const std::string& verdict_str) {
324  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
325  // There is something seriously wrong if there is no service class but
326  // this method is called.  The renderer should not start phishing detection
327  // if there isn't any service class in the browser.
328  DCHECK(csd_service_);
329  // We parse the protocol buffer here.  If we're unable to parse it we won't
330  // send the verdict further.
331  scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
332  if (csd_service_ &&
333      verdict->ParseFromString(verdict_str) &&
334      verdict->IsInitialized()) {
335    // There shouldn't be any pending requests because we revoke them everytime
336    // we navigate away.
337    DCHECK(!cb_factory_.HasPendingCallbacks());
338    csd_service_->SendClientReportPhishingRequest(
339        verdict.release(),  // The service takes ownership of the verdict.
340        cb_factory_.NewCallback(
341            &ClientSideDetectionHost::MaybeShowPhishingWarning));
342  }
343}
344
345void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
346                                                       bool is_phishing) {
347  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
348  if (is_phishing &&
349      CommandLine::ForCurrentProcess()->HasSwitch(
350          switches::kEnableClientSidePhishingInterstitial)) {
351    DCHECK(tab_contents());
352    // TODO(noelutz): this is not perfect.  It's still possible that the
353    // user browses away before the interstitial is shown.  Maybe we should
354    // stop all pending navigations?
355    if (sb_service_) {
356      // TODO(noelutz): refactor the SafeBrowsing service class and the
357      // SafeBrowsing blocking page class so that we don't need to depend
358      // on the SafeBrowsingService here and so that we don't need to go
359      // through the IO message loop.
360      std::vector<GURL> redirect_urls;
361      BrowserThread::PostTask(
362          BrowserThread::IO,
363          FROM_HERE,
364          NewRunnableMethod(sb_service_.get(),
365                            &SafeBrowsingService::DisplayBlockingPage,
366                            phishing_url, phishing_url,
367                            redirect_urls,
368                            // We only classify the main frame URL.
369                            ResourceType::MAIN_FRAME,
370                            // TODO(noelutz): create a separate threat type
371                            // for client-side phishing detection.
372                            SafeBrowsingService::URL_PHISHING,
373                            new CsdClient() /* will delete itself */,
374                            tab_contents()->GetRenderProcessHost()->id(),
375                            tab_contents()->render_view_host()->routing_id()));
376    }
377  }
378}
379
380void ClientSideDetectionHost::set_client_side_detection_service(
381    ClientSideDetectionService* service) {
382  csd_service_ = service;
383}
384
385void ClientSideDetectionHost::set_safe_browsing_service(
386    SafeBrowsingService* service) {
387  sb_service_ = service;
388}
389
390}  // namespace safe_browsing
391