client_side_detection_service.h revision ddb351dbec246cf1fab5ec20d2d5520909041de1
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Helper class which handles communication with the SafeBrowsing backends for
6// client-side phishing detection.  This class can be used to get a file
7// descriptor to the client-side phishing model and also to send a ping back to
8// Google to verify if a particular site is really phishing or not.
9//
10// This class is not thread-safe and expects all calls to GetModelFile() and
11// SendClientReportPhishingRequest() to be made on the UI thread.  We also
12// expect that the calling thread runs a message loop and that there is a FILE
13// thread running to execute asynchronous file operations.
14
15#ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
16#define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
17#pragma once
18
19#include <map>
20#include <queue>
21#include <string>
22#include <utility>
23#include <vector>
24
25#include "base/basictypes.h"
26#include "base/callback.h"
27#include "base/file_path.h"
28#include "base/gtest_prod_util.h"
29#include "base/memory/linked_ptr.h"
30#include "base/memory/ref_counted.h"
31#include "base/memory/scoped_callback_factory.h"
32#include "base/memory/scoped_ptr.h"
33#include "base/platform_file.h"
34#include "base/task.h"
35#include "base/time.h"
36#include "chrome/common/net/url_fetcher.h"
37#include "googleurl/src/gurl.h"
38#include "net/base/net_util.h"
39
40namespace net {
41class URLRequestContextGetter;
42class URLRequestStatus;
43}  // namespace net
44
45namespace safe_browsing {
46class ClientPhishingRequest;
47
48class ClientSideDetectionService : public URLFetcher::Delegate {
49 public:
50  typedef Callback1<base::PlatformFile>::Type OpenModelDoneCallback;
51
52  typedef Callback2<GURL /* phishing URL */, bool /* is phishing */>::Type
53      ClientReportPhishingRequestCallback;
54
55  virtual ~ClientSideDetectionService();
56
57  // Creates a client-side detection service and starts fetching the client-side
58  // detection model if necessary.  The model will be stored in |model_path|.
59  // The caller takes ownership of the object.  This function may return NULL.
60  static ClientSideDetectionService* Create(
61      const FilePath& model_path,
62      net::URLRequestContextGetter* request_context_getter);
63
64  // From the URLFetcher::Delegate interface.
65  virtual void OnURLFetchComplete(const URLFetcher* source,
66                                  const GURL& url,
67                                  const net::URLRequestStatus& status,
68                                  int response_code,
69                                  const ResponseCookies& cookies,
70                                  const std::string& data);
71
72  // Gets the model file descriptor once the model is ready and stored
73  // on disk.  If there was an error the callback is called and the
74  // platform file is set to kInvalidPlatformFileValue. The
75  // ClientSideDetectionService takes ownership of the |callback|.
76  // The callback is always called after GetModelFile() returns and on the
77  // same thread as GetModelFile() was called.
78  void GetModelFile(OpenModelDoneCallback* callback);
79
80  // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
81  // The URL scheme of the |url()| in the request should be HTTP.  This method
82  // takes ownership of the |verdict| as well as the |callback| and calls the
83  // the callback once the result has come back from the server or if an error
84  // occurs during the fetch.  If an error occurs the phishing verdict will
85  // always be false.  The callback is always called after
86  // SendClientReportPhishingRequest() returns and on the same thread as
87  // SendClientReportPhishingRequest() was called.
88  virtual void SendClientReportPhishingRequest(
89      ClientPhishingRequest* verdict,
90      ClientReportPhishingRequestCallback* callback);
91
92  // Returns true if the given IP address string falls within a private
93  // (unroutable) network block.  Pages which are hosted on these IP addresses
94  // are exempt from client-side phishing detection.  This is called by the
95  // ClientSideDetectionHost prior to sending the renderer a
96  // SafeBrowsingMsg_StartPhishingDetection IPC.
97  //
98  // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
99  // address.
100  virtual bool IsPrivateIPAddress(const std::string& ip_address) const;
101
102  // Returns true and sets is_phishing if url is in the cache and valid.
103  virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);
104
105  // Returns true if the url is in the cache.
106  virtual bool IsInCache(const GURL& url);
107
108  // Returns true if we have sent more than kMaxReportsPerInterval in the last
109  // kReportsInterval.
110  virtual bool OverReportLimit();
111
112 protected:
113  // Use Create() method to create an instance of this object.
114  ClientSideDetectionService(
115      const FilePath& model_path,
116      net::URLRequestContextGetter* request_context_getter);
117
118 private:
119  friend class ClientSideDetectionServiceTest;
120
121  enum ModelStatus {
122    // It's unclear whether or not the model was already fetched.
123    UNKNOWN_STATUS,
124    // Model is fetched and is stored on disk.
125    READY_STATUS,
126    // Error occured during fetching or writing.
127    ERROR_STATUS,
128  };
129
130  // CacheState holds all information necessary to respond to a caller without
131  // actually making a HTTP request.
132  struct CacheState {
133    bool is_phishing;
134    base::Time timestamp;
135
136    CacheState(bool phish, base::Time time);
137  };
138  typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;
139
140  // A tuple of (IP address block, prefix size) representing a private
141  // IP address range.
142  typedef std::pair<net::IPAddressNumber, size_t> AddressRange;
143
144  static const char kClientReportPhishingUrl[];
145  static const char kClientModelUrl[];
146  static const int kMaxReportsPerInterval;
147  static const base::TimeDelta kReportsInterval;
148  static const base::TimeDelta kNegativeCacheInterval;
149  static const base::TimeDelta kPositiveCacheInterval;
150
151  // Sets the model status and invokes all the pending callbacks in
152  // |open_callbacks_| with the current |model_file_| as parameter.
153  void SetModelStatus(ModelStatus status);
154
155  // Called once the initial open() of the model file is done.  If the file
156  // exists we're done and we can call all the pending callbacks.  If the
157  // file doesn't exist this method will asynchronously fetch the model
158  // from the server by invoking StartFetchingModel().
159  void OpenModelFileDone(base::PlatformFileError error_code,
160                         base::PassPlatformFile file,
161                         bool created);
162
163  // Callback that is invoked once the attempt to create the model
164  // file on disk is done.  If the file was created successfully we
165  // start writing the model to disk (asynchronously).  Otherwise, we
166  // give up and send an invalid platform file to all the pending callbacks.
167  void CreateModelFileDone(base::PlatformFileError error_code,
168                           base::PassPlatformFile file,
169                           bool created);
170
171  // Callback is invoked once we're done writing the model file to disk.
172  // If everything went well then |model_file_| is a valid platform file
173  // that can be sent to all the pending callbacks.  If an error occurs
174  // we give up and send an invalid platform file to all the pending callbacks.
175  void WriteModelFileDone(base::PlatformFileError error_code,
176                          int bytes_written);
177
178  // Helper function which closes the |model_file_| if necessary.
179  void CloseModelFile();
180
181  // Starts sending the request to the client-side detection frontends.
182  // This method takes ownership of both pointers.
183  void StartClientReportPhishingRequest(
184      ClientPhishingRequest* verdict,
185      ClientReportPhishingRequestCallback* callback);
186
187  // Starts getting the model file.
188  void StartGetModelFile(OpenModelDoneCallback* callback);
189
190  // Called by OnURLFetchComplete to handle the response from fetching the
191  // model.
192  void HandleModelResponse(const URLFetcher* source,
193                           const GURL& url,
194                           const net::URLRequestStatus& status,
195                           int response_code,
196                           const ResponseCookies& cookies,
197                           const std::string& data);
198
199  // Called by OnURLFetchComplete to handle the server response from
200  // sending the client-side phishing request.
201  void HandlePhishingVerdict(const URLFetcher* source,
202                             const GURL& url,
203                             const net::URLRequestStatus& status,
204                             int response_code,
205                             const ResponseCookies& cookies,
206                             const std::string& data);
207
208  // Invalidate cache results which are no longer useful.
209  void UpdateCache();
210
211  // Get the number of phishing reports that we have sent over kReportsInterval
212  int GetNumReports();
213
214  // Initializes the |private_networks_| vector with the network blocks
215  // that we consider non-public IP addresses.  Returns true on success.
216  bool InitializePrivateNetworks();
217
218  FilePath model_path_;
219  ModelStatus model_status_;
220  base::PlatformFile model_file_;
221  scoped_ptr<URLFetcher> model_fetcher_;
222  scoped_ptr<std::string> tmp_model_string_;
223  std::vector<OpenModelDoneCallback*> open_callbacks_;
224
225  // Map of client report phishing request to the corresponding callback that
226  // has to be invoked when the request is done.
227  struct ClientReportInfo;
228  std::map<const URLFetcher*, ClientReportInfo*> client_phishing_reports_;
229
230  // Cache of completed requests. Used to satisfy requests for the same urls
231  // as long as the next request falls within our caching window (which is
232  // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
233  // size of this cache is limited by kMaxReportsPerDay *
234  // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
235  // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
236  PhishingCache cache_;
237
238  // Timestamp of when we sent a phishing request. Used to limit the number
239  // of phishing requests that we send in a day.
240  // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
241  std::queue<base::Time> phishing_report_times_;
242
243  // Used to asynchronously call the callbacks for GetModelFile and
244  // SendClientReportPhishingRequest.
245  ScopedRunnableMethodFactory<ClientSideDetectionService> method_factory_;
246
247  // The client-side detection service object (this) might go away before some
248  // of the callbacks are done (e.g., asynchronous file operations).  The
249  // callback factory will revoke all pending callbacks if this goes away to
250  // avoid a crash.
251  base::ScopedCallbackFactory<ClientSideDetectionService> callback_factory_;
252
253  // The context we use to issue network requests.
254  scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
255
256  // The network blocks that we consider private IP address ranges.
257  std::vector<AddressRange> private_networks_;
258
259  DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService);
260};
261
262}  // namepsace safe_browsing
263
264#endif  // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
265