protocol_manager.h revision dc0f95d653279beabeb9817299e2902918ba123e
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
6#define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
7#pragma once
8
9// A class that implements Chrome's interface with the SafeBrowsing protocol.
10// The SafeBrowsingProtocolManager handles formatting and making requests of,
11// and handling responses from, Google's SafeBrowsing servers. This class uses
12// The SafeBrowsingProtocolParser class to do the actual parsing.
13
14#include <deque>
15#include <set>
16#include <string>
17#include <vector>
18
19#include "base/gtest_prod_util.h"
20#include "base/hash_tables.h"
21#include "base/scoped_ptr.h"
22#include "base/time.h"
23#include "base/timer.h"
24#include "chrome/browser/safe_browsing/chunk_range.h"
25#include "chrome/browser/safe_browsing/protocol_parser.h"
26#include "chrome/browser/safe_browsing/safe_browsing_service.h"
27#include "chrome/browser/safe_browsing/safe_browsing_util.h"
28#include "chrome/common/net/url_fetcher.h"
29
30namespace net {
31class URLRequestStatus;
32}  // namespace net
33
34#if defined(COMPILER_GCC)
35// Allows us to use URLFetchers in a hash_map with gcc (MSVC is okay without
36// specifying this).
37namespace __gnu_cxx {
38template<>
39struct hash<const URLFetcher*> {
40  size_t operator()(const URLFetcher* fetcher) const {
41    return reinterpret_cast<size_t>(fetcher);
42  }
43};
44}
45#endif
46
47class SafeBrowsingProtocolManager;
48// Interface of a factory to create ProtocolManager.  Useful for tests.
49class SBProtocolManagerFactory {
50 public:
51  SBProtocolManagerFactory() {}
52  virtual ~SBProtocolManagerFactory() {}
53  virtual SafeBrowsingProtocolManager* CreateProtocolManager(
54      SafeBrowsingService* sb_service,
55      const std::string& client_name,
56      const std::string& client_key,
57      const std::string& wrapped_key,
58      URLRequestContextGetter* request_context_getter,
59      const std::string& info_url_prefix,
60      const std::string& mackey_url_prefix,
61      bool disable_auto_update) = 0;
62 private:
63  DISALLOW_COPY_AND_ASSIGN(SBProtocolManagerFactory);
64};
65
66class SafeBrowsingProtocolManager : public URLFetcher::Delegate {
67  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestBackOffTimes);
68  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestChunkStrings);
69  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestGetHashUrl);
70  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
71                           TestGetHashBackOffTimes);
72  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestMacKeyUrl);
73  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
74                           TestSafeBrowsingHitUrl);
75  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
76                           TestMalwareDetailsUrl);
77  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestNextChunkUrl);
78  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestUpdateUrl);
79  friend class SafeBrowsingServiceTest;
80
81 public:
82  virtual ~SafeBrowsingProtocolManager();
83
84  // Makes the passed |factory| the factory used to instantiate
85  // a SafeBrowsingService. Useful for tests.
86  static void RegisterFactory(SBProtocolManagerFactory* factory) {
87    factory_ = factory;
88  }
89
90  // Create an instance of the safe browsing service.
91  static SafeBrowsingProtocolManager* Create(
92      SafeBrowsingService* sb_service,
93      const std::string& client_name,
94      const std::string& client_key,
95      const std::string& wrapped_key,
96      URLRequestContextGetter* request_context_getter,
97      const std::string& info_url_prefix,
98      const std::string& mackey_url_prefix,
99      bool disable_auto_update);
100
101  // Sets up the update schedule and internal state for making periodic requests
102  // of the SafeBrowsing service.
103  virtual void Initialize();
104
105  // URLFetcher::Delegate interface.
106  virtual void OnURLFetchComplete(const URLFetcher* source,
107                                  const GURL& url,
108                                  const net::URLRequestStatus& status,
109                                  int response_code,
110                                  const ResponseCookies& cookies,
111                                  const std::string& data);
112
113  // API used by the SafeBrowsingService for issuing queries. When the results
114  // are available, SafeBrowsingService::HandleGetHashResults is called.
115  virtual void GetFullHash(SafeBrowsingService::SafeBrowsingCheck* check,
116                           const std::vector<SBPrefix>& prefixes);
117
118  // Forces the start of next update after |next_update_msec| in msec.
119  void ForceScheduleNextUpdate(int next_update_msec);
120
121  // Scheduled update callback.
122  void GetNextUpdate();
123
124  // Called by the SafeBrowsingService when our request for a list of all chunks
125  // for each list is done.  If database_error is true, that means the protocol
126  // manager shouldn't fetch updates since they can't be written to disk.  It
127  // should try again later to open the database.
128  void OnGetChunksComplete(const std::vector<SBListChunkRanges>& list,
129                           bool database_error);
130
131  // Called after the chunks that were parsed were inserted in the database.
132  void OnChunkInserted();
133
134  // For UMA users we report to Google when a SafeBrowsing interstitial is shown
135  // to the user.  We assume that the threat type is either URL_MALWARE or
136  // URL_PHISHING.
137  void ReportSafeBrowsingHit(const GURL& malicious_url,
138                             const GURL& page_url,
139                             const GURL& referrer_url,
140                             bool is_subresource,
141                             SafeBrowsingService::UrlCheckResult threat_type);
142
143  // Users can opt-in on the SafeBrowsing interstitial to send detailed
144  // malware reports. |report| is the serialized report.
145  void ReportMalwareDetails(const std::string& report);
146
147  bool is_initial_request() const { return initial_request_; }
148
149  // The last time we received an update.
150  base::Time last_update() const { return last_update_; }
151
152  // Setter for additional_query_. To make sure the additional_query_ won't
153  // be changed in the middle of an update, caller (e.g.: SafeBrowsingService)
154  // should call this after callbacks triggered in UpdateFinished() or before
155  // IssueUpdateRequest().
156  void set_additional_query(const std::string& query) {
157    additional_query_ = query;
158  }
159  const std::string& additional_query() const {
160    return additional_query_;
161  }
162
163  // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
164  // ORDERING OF THESE VALUES.
165  enum ResultType {
166    // 200 response code means that the server recognized the hash
167    // prefix, while 204 is an empty response indicating that the
168    // server did not recognize it.
169    GET_HASH_STATUS_200,
170    GET_HASH_STATUS_204,
171
172    // Subset of successful responses which returned no full hashes.
173    // This includes the 204 case, and also 200 responses for stale
174    // prefixes (deleted at the server but yet deleted on the client).
175    GET_HASH_FULL_HASH_EMPTY,
176
177    // Subset of successful responses for which one or more of the
178    // full hashes matched (should lead to an interstitial).
179    GET_HASH_FULL_HASH_HIT,
180
181    // Subset of successful responses which weren't empty and have no
182    // matches.  It means that there was a prefix collision which was
183    // cleared up by the full hashes.
184    GET_HASH_FULL_HASH_MISS,
185
186    // Memory space for histograms is determined by the max.  ALWAYS
187    // ADD NEW VALUES BEFORE THIS ONE.
188    GET_HASH_RESULT_MAX
189  };
190
191  // Record a GetHash result. |is_download| indicates if the get
192  // hash is triggered by download related lookup.
193  static void RecordGetHashResult(bool is_download,
194                                  ResultType result_type);
195
196 protected:
197  // Constructs a SafeBrowsingProtocolManager for |sb_service| that issues
198  // network requests using |request_context_getter|. When |disable_auto_update|
199  // is true, protocol manager won't schedule next update until
200  // ForceScheduleNextUpdate is called.
201  SafeBrowsingProtocolManager(SafeBrowsingService* sb_service,
202                              const std::string& client_name,
203                              const std::string& client_key,
204                              const std::string& wrapped_key,
205                              URLRequestContextGetter* request_context_getter,
206                              const std::string& http_url_prefix,
207                              const std::string& https_url_prefix,
208                              bool disable_auto_update);
209 private:
210  friend class SBProtocolManagerFactoryImpl;
211
212  // Internal API for fetching information from the SafeBrowsing servers. The
213  // GetHash requests are higher priority since they can block user requests
214  // so are handled separately.
215  enum SafeBrowsingRequestType {
216    NO_REQUEST = 0,     // No requests in progress
217    UPDATE_REQUEST,     // Request for redirect URLs
218    CHUNK_REQUEST,      // Request for a specific chunk
219    GETKEY_REQUEST      // Update the client's MAC key
220  };
221
222  // Composes a URL using |prefix|, |method| (e.g.: gethash, download,
223  // newkey, report), |client_name| and |version|. When not empty,
224  // |additional_query| is appended to the URL with an additional "&"
225  // in the front.
226  static std::string ComposeUrl(const std::string& prefix,
227                                const std::string& method,
228                                const std::string& client_name,
229                                const std::string& version,
230                                const std::string& additional_query);
231
232  // Generates Update URL for querying about the latest set of chunk updates.
233  // Append "wrkey=xxx" to the URL when |use_mac| is true.
234  GURL UpdateUrl(bool use_mac) const;
235  // Generates GetHash request URL for retrieving full hashes.
236  // Append "wrkey=xxx" to the URL when |use_mac| is true.
237  GURL GetHashUrl(bool use_mac) const;
238  // Generates new MAC client key request URL.
239  GURL MacKeyUrl() const;
240  // Generates URL for reporting safe browsing hits for UMA users.
241  GURL SafeBrowsingHitUrl(
242      const GURL& malicious_url, const GURL& page_url, const GURL& referrer_url,
243      bool is_subresource,
244      SafeBrowsingService::UrlCheckResult threat_type) const;
245  // Generates URL for reporting malware details for users who opt-in.
246  GURL MalwareDetailsUrl() const;
247
248  // Composes a ChunkUrl based on input string.
249  GURL NextChunkUrl(const std::string& input) const;
250
251  // Returns the time (in milliseconds) for the next update request. If
252  // 'back_off' is true, the time returned will increment an error count and
253  // return the appriate next time (see ScheduleNextUpdate below).
254  int GetNextUpdateTime(bool back_off);
255
256  // Worker function for calculating GetHash and Update backoff times (in
257  // seconds). 'Multiplier' is doubled for each consecutive error between the
258  // 2nd and 5th, and 'error_count' is incremented with each call.
259  int GetNextBackOffTime(int* error_count, int* multiplier);
260
261  // Manages our update with the next allowable update time. If 'back_off_' is
262  // true, we must decrease the frequency of requests of the SafeBrowsing
263  // service according to section 5 of the protocol specification.
264  // When disable_auto_update_ is set, ScheduleNextUpdate will do nothing.
265  // ForceScheduleNextUpdate has to be called to trigger the update.
266  void ScheduleNextUpdate(bool back_off);
267
268  // Sends a request for a list of chunks we should download to the SafeBrowsing
269  // servers. In order to format this request, we need to send all the chunk
270  // numbers for each list that we have to the server. Getting the chunk numbers
271  // requires a database query (run on the database thread), and the request
272  // is sent upon completion of that query in OnGetChunksComplete.
273  void IssueUpdateRequest();
274
275  // Sends a request for a chunk to the SafeBrowsing servers.
276  void IssueChunkRequest();
277
278  // Gets a key from the SafeBrowsing servers for use with MAC. This should only
279  // be called once per client unless the server directly tells us to update.
280  void IssueKeyRequest();
281
282  // Formats a string returned from the database into:
283  //   "list_name;a:<add_chunk_ranges>:s:<sub_chunk_ranges>:mac\n"
284  static std::string FormatList(const SBListChunkRanges& list, bool use_mac);
285
286  // Runs the protocol parser on received data and update the
287  // SafeBrowsingService with the new content. Returns 'true' on successful
288  // parse, 'false' on error.
289  bool HandleServiceResponse(const GURL& url, const char* data, int length);
290
291  // If the SafeBrowsing service wants us to re-key, we clear our key state and
292  // issue the request.
293  void HandleReKey();
294
295  // Updates internal state for each GetHash response error, assuming that the
296  // current time is |now|.
297  void HandleGetHashError(const base::Time& now);
298
299  // Helper function for update completion.
300  void UpdateFinished(bool success);
301
302  // A callback that runs if we timeout waiting for a response to an update
303  // request. We use this to properly set our update state.
304  void UpdateResponseTimeout();
305
306 private:
307  // The factory that controls the creation of SafeBrowsingProtocolManager.
308  // This is used by tests.
309  static SBProtocolManagerFactory* factory_;
310
311  // Main SafeBrowsing interface object.
312  SafeBrowsingService* sb_service_;
313
314  // Current active request (in case we need to cancel) for updates or chunks
315  // from the SafeBrowsing service. We can only have one of these outstanding
316  // at any given time unlike GetHash requests, which are tracked separately.
317  scoped_ptr<URLFetcher> request_;
318
319  // The kind of request that is currently in progress.
320  SafeBrowsingRequestType request_type_;
321
322  // The number of HTTP response errors, used for request backoff timing.
323  int update_error_count_;
324  int gethash_error_count_;
325
326  // Multipliers which double (max == 8) for each error after the second.
327  int update_back_off_mult_;
328  int gethash_back_off_mult_;
329
330  // Multiplier between 0 and 1 to spread clients over an interval.
331  float back_off_fuzz_;
332
333  // The list for which we are make a request.
334  std::string list_name_;
335
336  // For managing the next earliest time to query the SafeBrowsing servers for
337  // updates.
338  int next_update_sec_;
339  base::OneShotTimer<SafeBrowsingProtocolManager> update_timer_;
340
341  // All chunk requests that need to be made, along with their MAC.
342  std::deque<ChunkUrl> chunk_request_urls_;
343
344  // Map of GetHash requests.
345  typedef base::hash_map<const URLFetcher*,
346                         SafeBrowsingService::SafeBrowsingCheck*> HashRequests;
347  HashRequests hash_requests_;
348
349  // The next scheduled update has special behavior for the first 2 requests.
350  enum UpdateRequestState {
351    FIRST_REQUEST = 0,
352    SECOND_REQUEST,
353    NORMAL_REQUEST
354  };
355  UpdateRequestState update_state_;
356
357  // We'll attempt to get keys once per browser session if we don't already have
358  // them. They are not essential to operation, but provide a layer of
359  // verification.
360  bool initial_request_;
361
362  // True if the service has been given an add/sub chunk but it hasn't been
363  // added to the database yet.
364  bool chunk_pending_to_write_;
365
366  // The keys used for MAC. Empty keys mean we aren't using MAC.
367  std::string client_key_;
368  std::string wrapped_key_;
369
370  // The last time we successfully received an update.
371  base::Time last_update_;
372
373  // While in GetHash backoff, we can't make another GetHash until this time.
374  base::Time next_gethash_time_;
375
376  // Current product version sent in each request.
377  std::string version_;
378
379  // Used for measuring chunk request latency.
380  base::Time chunk_request_start_;
381
382  // Tracks the size of each update (in bytes).
383  int update_size_;
384
385  // Track outstanding SafeBrowsing report fetchers for clean up.
386  // We add both "hit" and "detail" fetchers in this set.
387  std::set<const URLFetcher*> safebrowsing_reports_;
388
389  // The safe browsing client name sent in each request.
390  std::string client_name_;
391
392  // A string that is appended to the end of URLs for download, gethash,
393  // newkey, safebrowsing hits and chunk update requests.
394  std::string additional_query_;
395
396  // The context we use to issue network requests.
397  scoped_refptr<URLRequestContextGetter> request_context_getter_;
398
399  // URL prefix where browser fetches safebrowsing chunk updates, hashes, and
400  // reports hits to the safebrowsing list for UMA users.
401  std::string http_url_prefix_;
402
403  // URL prefix where browser fetches MAC client key, and reports detailed
404  // malware reports for users who opt-in.
405  std::string https_url_prefix_;
406
407  // When true, protocol manager will not start an update unless
408  // ForceScheduleNextUpdate() is called. This is set for testing purpose.
409  bool disable_auto_update_;
410
411  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolManager);
412};
413
414#endif  // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
415