1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
6#define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
7
8// A class that implements Chrome's interface with the SafeBrowsing protocol.
9// See https://developers.google.com/safe-browsing/developers_guide_v2 for
10// protocol details.
11//
12// The SafeBrowsingProtocolManager handles formatting and making requests of,
13// and handling responses from, Google's SafeBrowsing servers. This class uses
14// The SafeBrowsingProtocolParser class to do the actual parsing.
15
16#include <deque>
17#include <set>
18#include <string>
19#include <vector>
20
21#include "base/containers/hash_tables.h"
22#include "base/gtest_prod_util.h"
23#include "base/memory/scoped_ptr.h"
24#include "base/threading/non_thread_safe.h"
25#include "base/time/time.h"
26#include "base/timer/timer.h"
27#include "chrome/browser/safe_browsing/chunk_range.h"
28#include "chrome/browser/safe_browsing/protocol_manager_helper.h"
29#include "chrome/browser/safe_browsing/protocol_parser.h"
30#include "chrome/browser/safe_browsing/safe_browsing_util.h"
31#include "net/url_request/url_fetcher_delegate.h"
32#include "url/gurl.h"
33
34namespace net {
35class URLFetcher;
36class URLRequestContextGetter;
37}  // namespace net
38
39#if defined(COMPILER_GCC)
40// Allows us to use URLFetchers in a hash_map with gcc (MSVC is okay without
41// specifying this).
42namespace BASE_HASH_NAMESPACE {
43template<>
44struct hash<const net::URLFetcher*> {
45  size_t operator()(const net::URLFetcher* fetcher) const {
46    return reinterpret_cast<size_t>(fetcher);
47  }
48};
49}
50#endif
51
52class SBProtocolManagerFactory;
53class SafeBrowsingProtocolManagerDelegate;
54
55class SafeBrowsingProtocolManager : public net::URLFetcherDelegate,
56                                    public base::NonThreadSafe {
57 public:
58  // FullHashCallback is invoked when GetFullHash completes.
59  // Parameters:
60  //   - The vector of full hash results. If empty, indicates that there
61  //     were no matches, and that the resource is safe.
62  //   - Whether the result can be cached. This may not be the case when
63  //     the result did not come from the SB server, for example.
64  typedef base::Callback<void(const std::vector<SBFullHashResult>&,
65                              bool)> FullHashCallback;
66
67  virtual ~SafeBrowsingProtocolManager();
68
69  // Makes the passed |factory| the factory used to instantiate
70  // a SafeBrowsingService. Useful for tests.
71  static void RegisterFactory(SBProtocolManagerFactory* factory) {
72    factory_ = factory;
73  }
74
75  // Create an instance of the safe browsing protocol manager.
76  static SafeBrowsingProtocolManager* Create(
77      SafeBrowsingProtocolManagerDelegate* delegate,
78      net::URLRequestContextGetter* request_context_getter,
79      const SafeBrowsingProtocolConfig& config);
80
81  // Sets up the update schedule and internal state for making periodic requests
82  // of the Safebrowsing servers.
83  virtual void Initialize();
84
85  // net::URLFetcherDelegate interface.
86  virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
87
88  // Retrieve the full hash for a set of prefixes, and invoke the callback
89  // argument when the results are retrieved. The callback may be invoked
90  // synchronously.
91  virtual void GetFullHash(const std::vector<SBPrefix>& prefixes,
92                           FullHashCallback callback,
93                           bool is_download);
94
95  // Forces the start of next update after |interval| time.
96  void ForceScheduleNextUpdate(base::TimeDelta interval);
97
98  // Scheduled update callback.
99  void GetNextUpdate();
100
101  // Called by the SafeBrowsingService when our request for a list of all chunks
102  // for each list is done.  If database_error is true, that means the protocol
103  // manager shouldn't fetch updates since they can't be written to disk.  It
104  // should try again later to open the database.
105  void OnGetChunksComplete(const std::vector<SBListChunkRanges>& list,
106                           bool database_error);
107
108  // The last time we received an update.
109  base::Time last_update() const { return last_update_; }
110
111  // Setter for additional_query_. To make sure the additional_query_ won't
112  // be changed in the middle of an update, caller (e.g.: SafeBrowsingService)
113  // should call this after callbacks triggered in UpdateFinished() or before
114  // IssueUpdateRequest().
115  void set_additional_query(const std::string& query) {
116    additional_query_ = query;
117  }
118  const std::string& additional_query() const {
119    return additional_query_;
120  }
121
122  // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
123  // ORDERING OF THESE VALUES.
124  enum ResultType {
125    // 200 response code means that the server recognized the hash
126    // prefix, while 204 is an empty response indicating that the
127    // server did not recognize it.
128    GET_HASH_STATUS_200,
129    GET_HASH_STATUS_204,
130
131    // Subset of successful responses which returned no full hashes.
132    // This includes the 204 case, and also 200 responses for stale
133    // prefixes (deleted at the server but yet deleted on the client).
134    GET_HASH_FULL_HASH_EMPTY,
135
136    // Subset of successful responses for which one or more of the
137    // full hashes matched (should lead to an interstitial).
138    GET_HASH_FULL_HASH_HIT,
139
140    // Subset of successful responses which weren't empty and have no
141    // matches.  It means that there was a prefix collision which was
142    // cleared up by the full hashes.
143    GET_HASH_FULL_HASH_MISS,
144
145    // Memory space for histograms is determined by the max.  ALWAYS
146    // ADD NEW VALUES BEFORE THIS ONE.
147    GET_HASH_RESULT_MAX
148  };
149
150  // Record a GetHash result. |is_download| indicates if the get
151  // hash is triggered by download related lookup.
152  static void RecordGetHashResult(bool is_download,
153                                  ResultType result_type);
154
155  // Returns whether another update is currently scheduled.
156  bool IsUpdateScheduled() const;
157
158 protected:
159  // Constructs a SafeBrowsingProtocolManager for |delegate| that issues
160  // network requests using |request_context_getter|.
161  SafeBrowsingProtocolManager(
162      SafeBrowsingProtocolManagerDelegate* delegate,
163      net::URLRequestContextGetter* request_context_getter,
164      const SafeBrowsingProtocolConfig& config);
165
166 private:
167  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestBackOffTimes);
168  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestChunkStrings);
169  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestGetHashUrl);
170  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
171                           TestGetHashBackOffTimes);
172  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestNextChunkUrl);
173  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestUpdateUrl);
174  friend class SafeBrowsingServerTest;
175  friend class SBProtocolManagerFactoryImpl;
176
177  // Internal API for fetching information from the SafeBrowsing servers. The
178  // GetHash requests are higher priority since they can block user requests
179  // so are handled separately.
180  enum SafeBrowsingRequestType {
181    NO_REQUEST = 0,     // No requests in progress
182    UPDATE_REQUEST,     // Request for redirect URLs
183    BACKUP_UPDATE_REQUEST, // Request for redirect URLs to a backup URL.
184    CHUNK_REQUEST,      // Request for a specific chunk
185  };
186
187  // Which type of backup update request is being used.
188  enum BackupUpdateReason {
189    BACKUP_UPDATE_REASON_CONNECT,
190    BACKUP_UPDATE_REASON_HTTP,
191    BACKUP_UPDATE_REASON_NETWORK,
192    BACKUP_UPDATE_REASON_MAX,
193  };
194
195  // Generates Update URL for querying about the latest set of chunk updates.
196  GURL UpdateUrl() const;
197
198  // Generates backup Update URL for querying about the latest set of chunk
199  // updates. |url_prefix| is the base prefix to use.
200  GURL BackupUpdateUrl(BackupUpdateReason reason) const;
201
202  // Generates GetHash request URL for retrieving full hashes.
203  GURL GetHashUrl() const;
204  // Generates URL for reporting safe browsing hits for UMA users.
205
206  // Composes a ChunkUrl based on input string.
207  GURL NextChunkUrl(const std::string& input) const;
208
209  // Returns the time for the next update request. If |back_off| is true,
210  // the time returned will increment an error count and return the appriate
211  // next time (see ScheduleNextUpdate below).
212  base::TimeDelta GetNextUpdateInterval(bool back_off);
213
214  // Worker function for calculating GetHash and Update backoff times (in
215  // seconds). |multiplier| is doubled for each consecutive error between the
216  // 2nd and 5th, and |error_count| is incremented with each call.
217  base::TimeDelta GetNextBackOffInterval(int* error_count,
218                                         int* multiplier) const;
219
220  // Manages our update with the next allowable update time. If 'back_off_' is
221  // true, we must decrease the frequency of requests of the SafeBrowsing
222  // service according to section 5 of the protocol specification.
223  // When disable_auto_update_ is set, ScheduleNextUpdate will do nothing.
224  // ForceScheduleNextUpdate has to be called to trigger the update.
225  void ScheduleNextUpdate(bool back_off);
226
227  // Sends a request for a list of chunks we should download to the SafeBrowsing
228  // servers. In order to format this request, we need to send all the chunk
229  // numbers for each list that we have to the server. Getting the chunk numbers
230  // requires a database query (run on the database thread), and the request
231  // is sent upon completion of that query in OnGetChunksComplete.
232  void IssueUpdateRequest();
233
234  // Sends a backup request for a list of chunks to download, when the primary
235  // update request failed. |reason| specifies why the backup is needed. Unlike
236  // the primary IssueUpdateRequest, this does not need to hit the local
237  // SafeBrowsing database since the existing chunk numbers are remembered from
238  // the primary update request. Returns whether the backup request was issued -
239  // this may be false in cases where there is not a prefix specified.
240  bool IssueBackupUpdateRequest(BackupUpdateReason reason);
241
242  // Sends a request for a chunk to the SafeBrowsing servers.
243  void IssueChunkRequest();
244
245  // Formats a string returned from the database into:
246  //   "list_name;a:<add_chunk_ranges>:s:<sub_chunk_ranges>\n"
247  static std::string FormatList(const SBListChunkRanges& list);
248
249  // Runs the protocol parser on received data and update the
250  // SafeBrowsingService with the new content. Returns 'true' on successful
251  // parse, 'false' on error.
252  bool HandleServiceResponse(const GURL& url, const char* data, int length);
253
254  // Updates internal state for each GetHash response error, assuming that the
255  // current time is |now|.
256  void HandleGetHashError(const base::Time& now);
257
258  // Helper function for update completion.
259  void UpdateFinished(bool success);
260  void UpdateFinished(bool success, bool back_off);
261
262  // A callback that runs if we timeout waiting for a response to an update
263  // request. We use this to properly set our update state.
264  void UpdateResponseTimeout();
265
266  // Called after the chunks are added to the database.
267  void OnAddChunksComplete();
268
269 private:
270  // Map of GetHash requests to parameters which created it.
271  struct FullHashDetails {
272    FullHashDetails();
273    FullHashDetails(FullHashCallback callback, bool is_download);
274    ~FullHashDetails();
275
276    FullHashCallback callback;
277    bool is_download;
278  };
279  typedef base::hash_map<const net::URLFetcher*, FullHashDetails> HashRequests;
280
281  // The factory that controls the creation of SafeBrowsingProtocolManager.
282  // This is used by tests.
283  static SBProtocolManagerFactory* factory_;
284
285  // Our delegate.
286  SafeBrowsingProtocolManagerDelegate* delegate_;
287
288  // Current active request (in case we need to cancel) for updates or chunks
289  // from the SafeBrowsing service. We can only have one of these outstanding
290  // at any given time unlike GetHash requests, which are tracked separately.
291  scoped_ptr<net::URLFetcher> request_;
292
293  // The kind of request that is currently in progress.
294  SafeBrowsingRequestType request_type_;
295
296  // The number of HTTP response errors, used for request backoff timing.
297  int update_error_count_;
298  int gethash_error_count_;
299
300  // Multipliers which double (max == 8) for each error after the second.
301  int update_back_off_mult_;
302  int gethash_back_off_mult_;
303
304  // Multiplier between 0 and 1 to spread clients over an interval.
305  float back_off_fuzz_;
306
307  // The list for which we are make a request.
308  std::string list_name_;
309
310  // For managing the next earliest time to query the SafeBrowsing servers for
311  // updates.
312  base::TimeDelta next_update_interval_;
313  base::OneShotTimer<SafeBrowsingProtocolManager> update_timer_;
314
315  // timeout_timer_ is used to interrupt update requests which are taking
316  // too long.
317  base::OneShotTimer<SafeBrowsingProtocolManager> timeout_timer_;
318
319  // All chunk requests that need to be made.
320  std::deque<ChunkUrl> chunk_request_urls_;
321
322  HashRequests hash_requests_;
323
324  // The next scheduled update has special behavior for the first 2 requests.
325  enum UpdateRequestState {
326    FIRST_REQUEST = 0,
327    SECOND_REQUEST,
328    NORMAL_REQUEST
329  };
330  UpdateRequestState update_state_;
331
332  // True if the service has been given an add/sub chunk but it hasn't been
333  // added to the database yet.
334  bool chunk_pending_to_write_;
335
336  // The last time we successfully received an update.
337  base::Time last_update_;
338
339  // While in GetHash backoff, we can't make another GetHash until this time.
340  base::Time next_gethash_time_;
341
342  // Current product version sent in each request.
343  std::string version_;
344
345  // Used for measuring chunk request latency.
346  base::Time chunk_request_start_;
347
348  // Tracks the size of each update (in bytes).
349  int update_size_;
350
351  // The safe browsing client name sent in each request.
352  std::string client_name_;
353
354  // A string that is appended to the end of URLs for download, gethash,
355  // safebrowsing hits and chunk update requests.
356  std::string additional_query_;
357
358  // The context we use to issue network requests.
359  scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
360
361  // URL prefix where browser fetches safebrowsing chunk updates, and hashes.
362  std::string url_prefix_;
363
364  // Backup URL prefixes for updates.
365  std::string backup_url_prefixes_[BACKUP_UPDATE_REASON_MAX];
366
367  // The current reason why the backup update request is happening.
368  BackupUpdateReason backup_update_reason_;
369
370  // Data to POST when doing an update.
371  std::string update_list_data_;
372
373  // When true, protocol manager will not start an update unless
374  // ForceScheduleNextUpdate() is called. This is set for testing purpose.
375  bool disable_auto_update_;
376
377  // ID for URLFetchers for testing.
378  int url_fetcher_id_;
379
380  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolManager);
381};
382
383// Interface of a factory to create ProtocolManager.  Useful for tests.
384class SBProtocolManagerFactory {
385 public:
386  SBProtocolManagerFactory() {}
387  virtual ~SBProtocolManagerFactory() {}
388  virtual SafeBrowsingProtocolManager* CreateProtocolManager(
389      SafeBrowsingProtocolManagerDelegate* delegate,
390      net::URLRequestContextGetter* request_context_getter,
391      const SafeBrowsingProtocolConfig& config) = 0;
392 private:
393  DISALLOW_COPY_AND_ASSIGN(SBProtocolManagerFactory);
394};
395
396// Delegate interface for the SafeBrowsingProtocolManager.
397class SafeBrowsingProtocolManagerDelegate {
398 public:
399  typedef base::Callback<void(const std::vector<SBListChunkRanges>&, bool)>
400      GetChunksCallback;
401  typedef base::Callback<void(void)> AddChunksCallback;
402
403  virtual ~SafeBrowsingProtocolManagerDelegate();
404
405  // |UpdateStarted()| is called just before the SafeBrowsing update protocol
406  // has begun.
407  virtual void UpdateStarted() = 0;
408
409  // |UpdateFinished()| is called just after the SafeBrowsing update protocol
410  // has completed.
411  virtual void UpdateFinished(bool success) = 0;
412
413  // Wipe out the local database. The SafeBrowsing server can request this.
414  virtual void ResetDatabase() = 0;
415
416  // Retrieve all the local database chunks, and invoke |callback| with the
417  // results. The SafeBrowsingProtocolManagerDelegate must only invoke the
418  // callback if the SafeBrowsingProtocolManager is still alive. Only one call
419  // may be made to GetChunks at a time.
420  virtual void GetChunks(GetChunksCallback callback) = 0;
421
422  // Add new chunks to the database. Invokes |callback| when complete, but must
423  // call at a later time.
424  virtual void AddChunks(const std::string& list, SBChunkList* chunks,
425                         AddChunksCallback callback) = 0;
426
427  // Delete chunks from the database.
428  virtual void DeleteChunks(
429      std::vector<SBChunkDelete>* delete_chunks) = 0;
430};
431
432#endif  // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
433