1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Helper class which handles communication with the SafeBrowsing backends for
6// client-side phishing detection.  This class is used to fetch the client-side
7// model and send it to all renderers.  This class is also used to send a ping
8// back to Google to verify if a particular site is really phishing or not.
9//
10// This class is not thread-safe and expects all calls to be made on the UI
11// thread.  We also expect that the calling thread runs a message loop.
12
13#ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
14#define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
15
16#include <map>
17#include <queue>
18#include <set>
19#include <string>
20#include <utility>
21#include <vector>
22
23#include "base/basictypes.h"
24#include "base/callback_forward.h"
25#include "base/gtest_prod_util.h"
26#include "base/memory/linked_ptr.h"
27#include "base/memory/ref_counted.h"
28#include "base/memory/scoped_ptr.h"
29#include "base/memory/weak_ptr.h"
30#include "base/time/time.h"
31#include "content/public/browser/notification_observer.h"
32#include "content/public/browser/notification_registrar.h"
33#include "net/base/net_util.h"
34#include "net/url_request/url_fetcher_delegate.h"
35#include "url/gurl.h"
36
37class SafeBrowsingService;
38
39namespace base {
40class TimeDelta;
41}
42
43namespace content {
44class RenderProcessHost;
45}
46
47namespace net {
48class URLFetcher;
49class URLRequestContextGetter;
50class URLRequestStatus;
51typedef std::vector<std::string> ResponseCookies;
52}  // namespace net
53
54namespace safe_browsing {
55class ClientMalwareRequest;
56class ClientPhishingRequest;
57class ClientPhishingResponse;
58class ClientSideModel;
59
60class ClientSideDetectionService : public net::URLFetcherDelegate,
61                                   public content::NotificationObserver {
62 public:
63  // void(GURL phishing_url, bool is_phishing).
64  typedef base::Callback<void(GURL, bool)> ClientReportPhishingRequestCallback;
65  // void(GURL original_url, GURL malware_url, bool is_malware).
66  typedef base::Callback<void(GURL, GURL, bool)>
67      ClientReportMalwareRequestCallback;
68
69  virtual ~ClientSideDetectionService();
70
71  // Creates a client-side detection service.  The service is initially
72  // disabled, use SetEnabledAndRefreshState() to start it.  The caller takes
73  // ownership of the object.  This function may return NULL.
74  static ClientSideDetectionService* Create(
75      net::URLRequestContextGetter* request_context_getter);
76
77  // Enables or disables the service, and refreshes the state of all renderers.
78  // This is usually called by the SafeBrowsingService, which tracks whether
79  // any profile uses these services at all.  Disabling cancels any pending
80  // requests; existing ClientSideDetectionHosts will have their callbacks
81  // called with "false" verdicts.  Enabling starts downloading the model after
82  // a delay.  In all cases, each render process is updated to match the state
83  // of the SafeBrowsing preference for that profile.
84  void SetEnabledAndRefreshState(bool enabled);
85
86  bool enabled() const {
87    return enabled_;
88  }
89
90  // From the net::URLFetcherDelegate interface.
91  virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
92
93  // content::NotificationObserver overrides:
94  virtual void Observe(int type,
95                       const content::NotificationSource& source,
96                       const content::NotificationDetails& details) OVERRIDE;
97
98  // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
99  // The URL scheme of the |url()| in the request should be HTTP.  This method
100  // takes ownership of the |verdict| as well as the |callback| and calls the
101  // the callback once the result has come back from the server or if an error
102  // occurs during the fetch.  If the service is disabled or an error occurs
103  // the phishing verdict will always be false.  The callback is always called
104  // after SendClientReportPhishingRequest() returns and on the same thread as
105  // SendClientReportPhishingRequest() was called.  You may set |callback| to
106  // NULL if you don't care about the server verdict.
107  virtual void SendClientReportPhishingRequest(
108      ClientPhishingRequest* verdict,
109      const ClientReportPhishingRequestCallback& callback);
110
111  // Similar to above one, instead send ClientMalwareRequest
112  virtual void SendClientReportMalwareRequest(
113      ClientMalwareRequest* verdict,
114      const ClientReportMalwareRequestCallback& callback);
115
116  // Returns true if the given IP address string falls within a private
117  // (unroutable) network block.  Pages which are hosted on these IP addresses
118  // are exempt from client-side phishing detection.  This is called by the
119  // ClientSideDetectionHost prior to sending the renderer a
120  // SafeBrowsingMsg_StartPhishingDetection IPC.
121  //
122  // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
123  // address.
124  virtual bool IsPrivateIPAddress(const std::string& ip_address) const;
125
126  // Returns true and sets is_phishing if url is in the cache and valid.
127  virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);
128
129  // Returns true if the url is in the cache.
130  virtual bool IsInCache(const GURL& url);
131
132  // Returns true if we have sent more than kMaxReportsPerInterval phishing
133  // reports in the last kReportsInterval.
134  virtual bool OverPhishingReportLimit();
135
136  // Returns true if we have sent more than kMaxReportsPerInterval malware
137  // reports in the last kReportsInterval.
138  virtual bool OverMalwareReportLimit();
139
140 protected:
141  // Use Create() method to create an instance of this object.
142  explicit ClientSideDetectionService(
143      net::URLRequestContextGetter* request_context_getter);
144
145  // Enum used to keep stats about why we fail to get the client model.
146  enum ClientModelStatus {
147    MODEL_SUCCESS,
148    MODEL_NOT_CHANGED,
149    MODEL_FETCH_FAILED,
150    MODEL_EMPTY,
151    MODEL_TOO_LARGE,
152    MODEL_PARSE_ERROR,
153    MODEL_MISSING_FIELDS,
154    MODEL_INVALID_VERSION_NUMBER,
155    MODEL_BAD_HASH_IDS,
156    MODEL_STATUS_MAX  // Always add new values before this one.
157  };
158
159  // Starts fetching the model from the network or the cache.  This method
160  // is called periodically to check whether a new client model is available
161  // for download.
162  void StartFetchModel();
163
164  // Schedules the next fetch of the model.
165  virtual void ScheduleFetchModel(int64 delay_ms);  // Virtual for testing.
166
167  // This method is called when we're done fetching the model either because
168  // we hit an error somewhere or because we're actually done fetch and
169  // validating the model.
170  virtual void EndFetchModel(ClientModelStatus status);  // Virtual for testing.
171
172 private:
173  friend class ClientSideDetectionServiceTest;
174  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, FetchModelTest);
175  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetBadSubnets);
176  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
177                           SetEnabledAndRefreshState);
178  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress);
179  FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
180                           ModelHasValidHashIds);
181
182  // CacheState holds all information necessary to respond to a caller without
183  // actually making a HTTP request.
184  struct CacheState {
185    bool is_phishing;
186    base::Time timestamp;
187
188    CacheState(bool phish, base::Time time);
189  };
190  typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;
191
192  // A tuple of (IP address block, prefix size) representing a private
193  // IP address range.
194  typedef std::pair<net::IPAddressNumber, size_t> AddressRange;
195
196  // Maps a IPv6 subnet mask to a set of hashed IPv6 subnets.  The IPv6
197  // subnets are in network order and hashed with sha256.
198  typedef std::map<std::string /* subnet mask */,
199                   std::set<std::string /* hashed subnet */> > BadSubnetMap;
200
201  static const char kClientReportMalwareUrl[];
202  static const char kClientReportPhishingUrl[];
203  static const char kClientModelUrl[];
204  static const size_t kMaxModelSizeBytes;
205  static const int kMaxReportsPerInterval;
206  static const int kClientModelFetchIntervalMs;
207  static const int kInitialClientModelFetchDelayMs;
208  static const int kReportsIntervalDays;
209  static const int kNegativeCacheIntervalDays;
210  static const int kPositiveCacheIntervalMinutes;
211
212  // Starts sending the request to the client-side detection frontends.
213  // This method takes ownership of both pointers.
214  void StartClientReportPhishingRequest(
215      ClientPhishingRequest* verdict,
216      const ClientReportPhishingRequestCallback& callback);
217
218  void StartClientReportMalwareRequest(
219      ClientMalwareRequest* verdict,
220      const ClientReportMalwareRequestCallback& callback);
221
222  // Called by OnURLFetchComplete to handle the response from fetching the
223  // model.
224  void HandleModelResponse(const net::URLFetcher* source,
225                           const GURL& url,
226                           const net::URLRequestStatus& status,
227                           int response_code,
228                           const net::ResponseCookies& cookies,
229                           const std::string& data);
230
231  // Called by OnURLFetchComplete to handle the server response from
232  // sending the client-side phishing request.
233  void HandlePhishingVerdict(const net::URLFetcher* source,
234                             const GURL& url,
235                             const net::URLRequestStatus& status,
236                             int response_code,
237                             const net::ResponseCookies& cookies,
238                             const std::string& data);
239
240  // Called by OnURLFetchComplete to handle the server response from
241  // sending the client-side malware request.
242  void HandleMalwareVerdict(const net::URLFetcher* source,
243                            const GURL& url,
244                            const net::URLRequestStatus& status,
245                            int response_code,
246                            const net::ResponseCookies& cookies,
247                            const std::string& data);
248
249  // Invalidate cache results which are no longer useful.
250  void UpdateCache();
251
252  // Get the number of malware reports that we have sent over kReportsInterval.
253  int GetMalwareNumReports();
254
255  // Get the number of phishing reports that we have sent over kReportsInterval.
256  int GetPhishingNumReports();
257
258  // Get the number of reports that we have sent over kReportsInterval, and
259  // trims off the old elements.
260  int GetNumReports(std::queue<base::Time>* report_times);
261
262  // Send the model to the given renderer.
263  void SendModelToProcess(content::RenderProcessHost* process);
264
265  // Same as above but sends the model to all rendereres.
266  void SendModelToRenderers();
267
268  // Reads the bad subnets from the client model and inserts them into
269  // |bad_subnets| for faster lookups.  This method is static to simplify
270  // testing.
271  static void SetBadSubnets(const ClientSideModel& model,
272                            BadSubnetMap* bad_subnets);
273
274
275  // Returns true iff all the hash id's in the client-side model point to
276  // valid hashes in the model.
277  static bool ModelHasValidHashIds(const ClientSideModel& model);
278
279  // Returns the URL that will be used for phishing requests.
280  static GURL GetClientReportUrl(const std::string& report_url);
281
282  // Whether the service is running or not.  When the service is not running,
283  // it won't download the model nor report detected phishing URLs.
284  bool enabled_;
285
286  std::string model_str_;
287  scoped_ptr<ClientSideModel> model_;
288  scoped_ptr<base::TimeDelta> model_max_age_;
289  scoped_ptr<net::URLFetcher> model_fetcher_;
290
291  // Map of client report phishing request to the corresponding callback that
292  // has to be invoked when the request is done.
293  struct ClientReportInfo;
294  std::map<const net::URLFetcher*, ClientReportInfo*>
295      client_phishing_reports_;
296  // Map of client malware ip request to the corresponding callback that
297  // has to be invoked when the request is done.
298  struct ClientMalwareReportInfo;
299  std::map<const net::URLFetcher*, ClientMalwareReportInfo*>
300      client_malware_reports_;
301
302  // Cache of completed requests. Used to satisfy requests for the same urls
303  // as long as the next request falls within our caching window (which is
304  // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
305  // size of this cache is limited by kMaxReportsPerDay *
306  // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
307  // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
308  PhishingCache cache_;
309
310  // Timestamp of when we sent a phishing request. Used to limit the number
311  // of phishing requests that we send in a day.
312  // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
313  std::queue<base::Time> phishing_report_times_;
314
315  // Timestamp of when we sent a malware request. Used to limit the number
316  // of malware requests that we send in a day.
317  std::queue<base::Time> malware_report_times_;
318
319  // The context we use to issue network requests.
320  scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
321
322  // Map of bad subnets which are copied from the client model and put into
323  // this map to speed up lookups.
324  BadSubnetMap bad_subnets_;
325
326  content::NotificationRegistrar registrar_;
327
328  // Used to asynchronously call the callbacks for
329  // SendClientReportPhishingRequest.
330  base::WeakPtrFactory<ClientSideDetectionService> weak_factory_;
331
332  DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService);
333};
334}  // namespace safe_browsing
335
336#endif  // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
337