protocol_manager.h revision 3f50c38dc070f4bb515c1b64450dae14f316474e
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
6#define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
7#pragma once
8
9// A class that implements Chrome's interface with the SafeBrowsing protocol.
10// The SafeBrowsingProtocolManager handles formatting and making requests of,
11// and handling responses from, Google's SafeBrowsing servers. This class uses
12// The SafeBrowsingProtocolParser class to do the actual parsing.
13
14#include <deque>
15#include <set>
16#include <string>
17#include <vector>
18
19#include "base/gtest_prod_util.h"
20#include "base/hash_tables.h"
21#include "base/scoped_ptr.h"
22#include "base/time.h"
23#include "base/timer.h"
24#include "chrome/browser/safe_browsing/chunk_range.h"
25#include "chrome/browser/safe_browsing/protocol_parser.h"
26#include "chrome/browser/safe_browsing/safe_browsing_service.h"
27#include "chrome/browser/safe_browsing/safe_browsing_util.h"
28#include "chrome/common/net/url_fetcher.h"
29
30namespace net {
31class URLRequestStatus;
32}  // namespace net
33
34#if defined(COMPILER_GCC)
35// Allows us to use URLFetchers in a hash_map with gcc (MSVC is okay without
36// specifying this).
37namespace __gnu_cxx {
38template<>
39struct hash<const URLFetcher*> {
40  size_t operator()(const URLFetcher* fetcher) const {
41    return reinterpret_cast<size_t>(fetcher);
42  }
43};
44}
45#endif
46
47class SafeBrowsingProtocolManager;
48// Interface of a factory to create ProtocolManager.  Useful for tests.
49class SBProtocolManagerFactory {
50 public:
51  SBProtocolManagerFactory() {}
52  virtual ~SBProtocolManagerFactory() {}
53  virtual SafeBrowsingProtocolManager* CreateProtocolManager(
54      SafeBrowsingService* sb_service,
55      const std::string& client_name,
56      const std::string& client_key,
57      const std::string& wrapped_key,
58      URLRequestContextGetter* request_context_getter,
59      const std::string& info_url_prefix,
60      const std::string& mackey_url_prefix,
61      bool disable_auto_update) = 0;
62 private:
63  DISALLOW_COPY_AND_ASSIGN(SBProtocolManagerFactory);
64};
65
66class SafeBrowsingProtocolManager : public URLFetcher::Delegate {
67  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestBackOffTimes);
68  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestChunkStrings);
69  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestGetHashUrl);
70  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
71                           TestGetHashBackOffTimes);
72  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestMacKeyUrl);
73  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
74                           TestSafeBrowsingHitUrl);
75  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
76                           TestMalwareDetailsUrl);
77  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestNextChunkUrl);
78  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestUpdateUrl);
79  friend class SafeBrowsingServiceTest;
80
81 public:
82  virtual ~SafeBrowsingProtocolManager();
83
84  // Makes the passed |factory| the factory used to instantiate
85  // a SafeBrowsingService. Useful for tests.
86  static void RegisterFactory(SBProtocolManagerFactory* factory) {
87    factory_ = factory;
88  }
89
90  // Create an instance of the safe browsing service.
91  static SafeBrowsingProtocolManager* Create(
92      SafeBrowsingService* sb_service,
93      const std::string& client_name,
94      const std::string& client_key,
95      const std::string& wrapped_key,
96      URLRequestContextGetter* request_context_getter,
97      const std::string& info_url_prefix,
98      const std::string& mackey_url_prefix,
99      bool disable_auto_update);
100
101  // Sets up the update schedule and internal state for making periodic requests
102  // of the SafeBrowsing service.
103  virtual void Initialize();
104
105  // URLFetcher::Delegate interface.
106  virtual void OnURLFetchComplete(const URLFetcher* source,
107                                  const GURL& url,
108                                  const net::URLRequestStatus& status,
109                                  int response_code,
110                                  const ResponseCookies& cookies,
111                                  const std::string& data);
112
113  // API used by the SafeBrowsingService for issuing queries. When the results
114  // are available, SafeBrowsingService::HandleGetHashResults is called.
115  virtual void GetFullHash(SafeBrowsingService::SafeBrowsingCheck* check,
116                           const std::vector<SBPrefix>& prefixes);
117
118  // Forces the start of next update after |next_update_msec| in msec.
119  void ForceScheduleNextUpdate(int next_update_msec);
120
121  // Scheduled update callback.
122  void GetNextUpdate();
123
124  // Called by the SafeBrowsingService when our request for a list of all chunks
125  // for each list is done.  If database_error is true, that means the protocol
126  // manager shouldn't fetch updates since they can't be written to disk.  It
127  // should try again later to open the database.
128  void OnGetChunksComplete(const std::vector<SBListChunkRanges>& list,
129                           bool database_error);
130
131  // Called after the chunks that were parsed were inserted in the database.
132  void OnChunkInserted();
133
134  // For UMA users we report to Google when a SafeBrowsing interstitial is shown
135  // to the user.  We assume that the threat type is either URL_MALWARE or
136  // URL_PHISHING.
137  void ReportSafeBrowsingHit(const GURL& malicious_url,
138                             const GURL& page_url,
139                             const GURL& referrer_url,
140                             bool is_subresource,
141                             SafeBrowsingService::UrlCheckResult threat_type);
142
143  // Users can opt-in on the SafeBrowsing interstitial to send detailed
144  // malware reports. |report| is the serialized report.
145  void ReportMalwareDetails(const std::string& report);
146
147  bool is_initial_request() const { return initial_request_; }
148
149  // The last time we received an update.
150  base::Time last_update() const { return last_update_; }
151
152  // Setter for additional_query_. To make sure the additional_query_ won't
153  // be changed in the middle of an update, caller (e.g.: SafeBrowsingService)
154  // should call this after callbacks triggered in UpdateFinished() or before
155  // IssueUpdateRequest().
156  void set_additional_query(const std::string& query) {
157    additional_query_ = query;
158  }
159  const std::string& additional_query() const {
160    return additional_query_;
161  }
162
163  // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
164  // ORDERING OF THESE VALUES.
165  enum ResultType {
166    // 200 response code means that the server recognized the hash
167    // prefix, while 204 is an empty response indicating that the
168    // server did not recognize it.
169    GET_HASH_STATUS_200,
170    GET_HASH_STATUS_204,
171
172    // Subset of successful responses which returned no full hashes.
173    // This includes the 204 case, and also 200 responses for stale
174    // prefixes (deleted at the server but yet deleted on the client).
175    GET_HASH_FULL_HASH_EMPTY,
176
177    // Subset of successful responses for which one or more of the
178    // full hashes matched (should lead to an interstitial).
179    GET_HASH_FULL_HASH_HIT,
180
181    // Subset of successful responses which weren't empty and have no
182    // matches.  It means that there was a prefix collision which was
183    // cleared up by the full hashes.
184    GET_HASH_FULL_HASH_MISS,
185
186    // Memory space for histograms is determined by the max.  ALWAYS
187    // ADD NEW VALUES BEFORE THIS ONE.
188    GET_HASH_RESULT_MAX
189  };
190
191  // Record a GetHash result.
192  static void RecordGetHashResult(ResultType result_type);
193
194 protected:
195  // Constructs a SafeBrowsingProtocolManager for |sb_service| that issues
196  // network requests using |request_context_getter|. When |disable_auto_update|
197  // is true, protocol manager won't schedule next update until
198  // ForceScheduleNextUpdate is called.
199  SafeBrowsingProtocolManager(SafeBrowsingService* sb_service,
200                              const std::string& client_name,
201                              const std::string& client_key,
202                              const std::string& wrapped_key,
203                              URLRequestContextGetter* request_context_getter,
204                              const std::string& http_url_prefix,
205                              const std::string& https_url_prefix,
206                              bool disable_auto_update);
207 private:
208  friend class SBProtocolManagerFactoryImpl;
209
210  // Internal API for fetching information from the SafeBrowsing servers. The
211  // GetHash requests are higher priority since they can block user requests
212  // so are handled separately.
213  enum SafeBrowsingRequestType {
214    NO_REQUEST = 0,     // No requests in progress
215    UPDATE_REQUEST,     // Request for redirect URLs
216    CHUNK_REQUEST,      // Request for a specific chunk
217    GETKEY_REQUEST      // Update the client's MAC key
218  };
219
220  // Composes a URL using |prefix|, |method| (e.g.: gethash, download,
221  // newkey, report), |client_name| and |version|. When not empty,
222  // |additional_query| is appended to the URL with an additional "&"
223  // in the front.
224  static std::string ComposeUrl(const std::string& prefix,
225                                const std::string& method,
226                                const std::string& client_name,
227                                const std::string& version,
228                                const std::string& additional_query);
229
230  // Generates Update URL for querying about the latest set of chunk updates.
231  // Append "wrkey=xxx" to the URL when |use_mac| is true.
232  GURL UpdateUrl(bool use_mac) const;
233  // Generates GetHash request URL for retrieving full hashes.
234  // Append "wrkey=xxx" to the URL when |use_mac| is true.
235  GURL GetHashUrl(bool use_mac) const;
236  // Generates new MAC client key request URL.
237  GURL MacKeyUrl() const;
238  // Generates URL for reporting safe browsing hits for UMA users.
239  GURL SafeBrowsingHitUrl(
240      const GURL& malicious_url, const GURL& page_url, const GURL& referrer_url,
241      bool is_subresource,
242      SafeBrowsingService::UrlCheckResult threat_type) const;
243  // Generates URL for reporting malware details for users who opt-in.
244  GURL MalwareDetailsUrl() const;
245
246  // Composes a ChunkUrl based on input string.
247  GURL NextChunkUrl(const std::string& input) const;
248
249  // Returns the time (in milliseconds) for the next update request. If
250  // 'back_off' is true, the time returned will increment an error count and
251  // return the appriate next time (see ScheduleNextUpdate below).
252  int GetNextUpdateTime(bool back_off);
253
254  // Worker function for calculating GetHash and Update backoff times (in
255  // seconds). 'Multiplier' is doubled for each consecutive error between the
256  // 2nd and 5th, and 'error_count' is incremented with each call.
257  int GetNextBackOffTime(int* error_count, int* multiplier);
258
259  // Manages our update with the next allowable update time. If 'back_off_' is
260  // true, we must decrease the frequency of requests of the SafeBrowsing
261  // service according to section 5 of the protocol specification.
262  // When disable_auto_update_ is set, ScheduleNextUpdate will do nothing.
263  // ForceScheduleNextUpdate has to be called to trigger the update.
264  void ScheduleNextUpdate(bool back_off);
265
266  // Sends a request for a list of chunks we should download to the SafeBrowsing
267  // servers. In order to format this request, we need to send all the chunk
268  // numbers for each list that we have to the server. Getting the chunk numbers
269  // requires a database query (run on the database thread), and the request
270  // is sent upon completion of that query in OnGetChunksComplete.
271  void IssueUpdateRequest();
272
273  // Sends a request for a chunk to the SafeBrowsing servers.
274  void IssueChunkRequest();
275
276  // Gets a key from the SafeBrowsing servers for use with MAC. This should only
277  // be called once per client unless the server directly tells us to update.
278  void IssueKeyRequest();
279
280  // Formats a string returned from the database into:
281  //   "list_name;a:<add_chunk_ranges>:s:<sub_chunk_ranges>:mac\n"
282  static std::string FormatList(const SBListChunkRanges& list, bool use_mac);
283
284  // Runs the protocol parser on received data and update the
285  // SafeBrowsingService with the new content. Returns 'true' on successful
286  // parse, 'false' on error.
287  bool HandleServiceResponse(const GURL& url, const char* data, int length);
288
289  // If the SafeBrowsing service wants us to re-key, we clear our key state and
290  // issue the request.
291  void HandleReKey();
292
293  // Updates internal state for each GetHash response error, assuming that the
294  // current time is |now|.
295  void HandleGetHashError(const base::Time& now);
296
297  // Helper function for update completion.
298  void UpdateFinished(bool success);
299
300  // A callback that runs if we timeout waiting for a response to an update
301  // request. We use this to properly set our update state.
302  void UpdateResponseTimeout();
303
304 private:
305  // The factory that controls the creation of SafeBrowsingProtocolManager.
306  // This is used by tests.
307  static SBProtocolManagerFactory* factory_;
308
309  // Main SafeBrowsing interface object.
310  SafeBrowsingService* sb_service_;
311
312  // Current active request (in case we need to cancel) for updates or chunks
313  // from the SafeBrowsing service. We can only have one of these outstanding
314  // at any given time unlike GetHash requests, which are tracked separately.
315  scoped_ptr<URLFetcher> request_;
316
317  // The kind of request that is currently in progress.
318  SafeBrowsingRequestType request_type_;
319
320  // The number of HTTP response errors, used for request backoff timing.
321  int update_error_count_;
322  int gethash_error_count_;
323
324  // Multipliers which double (max == 8) for each error after the second.
325  int update_back_off_mult_;
326  int gethash_back_off_mult_;
327
328  // Multiplier between 0 and 1 to spread clients over an interval.
329  float back_off_fuzz_;
330
331  // The list for which we are make a request.
332  std::string list_name_;
333
334  // For managing the next earliest time to query the SafeBrowsing servers for
335  // updates.
336  int next_update_sec_;
337  base::OneShotTimer<SafeBrowsingProtocolManager> update_timer_;
338
339  // All chunk requests that need to be made, along with their MAC.
340  std::deque<ChunkUrl> chunk_request_urls_;
341
342  // Map of GetHash requests.
343  typedef base::hash_map<const URLFetcher*,
344                         SafeBrowsingService::SafeBrowsingCheck*> HashRequests;
345  HashRequests hash_requests_;
346
347  // The next scheduled update has special behavior for the first 2 requests.
348  enum UpdateRequestState {
349    FIRST_REQUEST = 0,
350    SECOND_REQUEST,
351    NORMAL_REQUEST
352  };
353  UpdateRequestState update_state_;
354
355  // We'll attempt to get keys once per browser session if we don't already have
356  // them. They are not essential to operation, but provide a layer of
357  // verification.
358  bool initial_request_;
359
360  // True if the service has been given an add/sub chunk but it hasn't been
361  // added to the database yet.
362  bool chunk_pending_to_write_;
363
364  // The keys used for MAC. Empty keys mean we aren't using MAC.
365  std::string client_key_;
366  std::string wrapped_key_;
367
368  // The last time we successfully received an update.
369  base::Time last_update_;
370
371  // While in GetHash backoff, we can't make another GetHash until this time.
372  base::Time next_gethash_time_;
373
374  // Current product version sent in each request.
375  std::string version_;
376
377  // Used for measuring chunk request latency.
378  base::Time chunk_request_start_;
379
380  // Tracks the size of each update (in bytes).
381  int update_size_;
382
383  // Track outstanding SafeBrowsing report fetchers for clean up.
384  // We add both "hit" and "detail" fetchers in this set.
385  std::set<const URLFetcher*> safebrowsing_reports_;
386
387  // The safe browsing client name sent in each request.
388  std::string client_name_;
389
390  // A string that is appended to the end of URLs for download, gethash,
391  // newkey, safebrowsing hits and chunk update requests.
392  std::string additional_query_;
393
394  // The context we use to issue network requests.
395  scoped_refptr<URLRequestContextGetter> request_context_getter_;
396
397  // URL prefix where browser fetches safebrowsing chunk updates, hashes, and
398  // reports hits to the safebrowsing list for UMA users.
399  std::string http_url_prefix_;
400
401  // URL prefix where browser fetches MAC client key, and reports detailed
402  // malware reports for users who opt-in.
403  std::string https_url_prefix_;
404
405  // When true, protocol manager will not start an update unless
406  // ForceScheduleNextUpdate() is called. This is set for testing purpose.
407  bool disable_auto_update_;
408
409  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolManager);
410};
411
412#endif  // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
413