1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
6#define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
7#pragma once
8
9// A class that implements Chrome's interface with the SafeBrowsing protocol.
10// The SafeBrowsingProtocolManager handles formatting and making requests of,
11// and handling responses from, Google's SafeBrowsing servers. This class uses
12// The SafeBrowsingProtocolParser class to do the actual parsing.
13
14#include <deque>
15#include <set>
16#include <string>
17#include <vector>
18
19#include "base/gtest_prod_util.h"
20#include "base/hash_tables.h"
21#include "base/memory/scoped_ptr.h"
22#include "base/time.h"
23#include "base/timer.h"
24#include "chrome/browser/safe_browsing/chunk_range.h"
25#include "chrome/browser/safe_browsing/protocol_parser.h"
26#include "chrome/browser/safe_browsing/safe_browsing_service.h"
27#include "chrome/browser/safe_browsing/safe_browsing_util.h"
28#include "chrome/common/net/url_fetcher.h"
29
30namespace net {
31class URLRequestStatus;
32}  // namespace net
33
34#if defined(COMPILER_GCC)
35// Allows us to use URLFetchers in a hash_map with gcc (MSVC is okay without
36// specifying this).
37namespace __gnu_cxx {
38template<>
39struct hash<const URLFetcher*> {
40  size_t operator()(const URLFetcher* fetcher) const {
41    return reinterpret_cast<size_t>(fetcher);
42  }
43};
44}
45#endif
46
47class SafeBrowsingProtocolManager;
48// Interface of a factory to create ProtocolManager.  Useful for tests.
49class SBProtocolManagerFactory {
50 public:
51  SBProtocolManagerFactory() {}
52  virtual ~SBProtocolManagerFactory() {}
53  virtual SafeBrowsingProtocolManager* CreateProtocolManager(
54      SafeBrowsingService* sb_service,
55      const std::string& client_name,
56      const std::string& client_key,
57      const std::string& wrapped_key,
58      net::URLRequestContextGetter* request_context_getter,
59      const std::string& info_url_prefix,
60      const std::string& mackey_url_prefix,
61      bool disable_auto_update) = 0;
62 private:
63  DISALLOW_COPY_AND_ASSIGN(SBProtocolManagerFactory);
64};
65
66class SafeBrowsingProtocolManager : public URLFetcher::Delegate {
67  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestBackOffTimes);
68  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestChunkStrings);
69  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestGetHashUrl);
70  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
71                           TestGetHashBackOffTimes);
72  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestMacKeyUrl);
73  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
74                           TestSafeBrowsingHitUrl);
75  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
76                           TestMalwareDetailsUrl);
77  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestNextChunkUrl);
78  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestUpdateUrl);
79  friend class SafeBrowsingServiceTest;
80
81 public:
82  virtual ~SafeBrowsingProtocolManager();
83
84  // Makes the passed |factory| the factory used to instantiate
85  // a SafeBrowsingService. Useful for tests.
86  static void RegisterFactory(SBProtocolManagerFactory* factory) {
87    factory_ = factory;
88  }
89
90  // Create an instance of the safe browsing service.
91  static SafeBrowsingProtocolManager* Create(
92      SafeBrowsingService* sb_service,
93      const std::string& client_name,
94      const std::string& client_key,
95      const std::string& wrapped_key,
96      net::URLRequestContextGetter* request_context_getter,
97      const std::string& info_url_prefix,
98      const std::string& mackey_url_prefix,
99      bool disable_auto_update);
100
101  // Sets up the update schedule and internal state for making periodic requests
102  // of the SafeBrowsing service.
103  virtual void Initialize();
104
105  // URLFetcher::Delegate interface.
106  virtual void OnURLFetchComplete(const URLFetcher* source,
107                                  const GURL& url,
108                                  const net::URLRequestStatus& status,
109                                  int response_code,
110                                  const ResponseCookies& cookies,
111                                  const std::string& data);
112
113  // API used by the SafeBrowsingService for issuing queries. When the results
114  // are available, SafeBrowsingService::HandleGetHashResults is called.
115  virtual void GetFullHash(SafeBrowsingService::SafeBrowsingCheck* check,
116                           const std::vector<SBPrefix>& prefixes);
117
118  // Forces the start of next update after |next_update_msec| in msec.
119  void ForceScheduleNextUpdate(int next_update_msec);
120
121  // Scheduled update callback.
122  void GetNextUpdate();
123
124  // Called by the SafeBrowsingService when our request for a list of all chunks
125  // for each list is done.  If database_error is true, that means the protocol
126  // manager shouldn't fetch updates since they can't be written to disk.  It
127  // should try again later to open the database.
128  void OnGetChunksComplete(const std::vector<SBListChunkRanges>& list,
129                           bool database_error);
130
131  // Called after the chunks that were parsed were inserted in the database.
132  void OnChunkInserted();
133
134  // For UMA users we report to Google when a SafeBrowsing interstitial is shown
135  // to the user.  |threat_type| should be one of the types known by
136  // SafeBrowsingHitUrl.
137  void ReportSafeBrowsingHit(const GURL& malicious_url,
138                             const GURL& page_url,
139                             const GURL& referrer_url,
140                             bool is_subresource,
141                             SafeBrowsingService::UrlCheckResult threat_type,
142                             const std::string& post_data);
143
144  // Users can opt-in on the SafeBrowsing interstitial to send detailed
145  // malware reports. |report| is the serialized report.
146  void ReportMalwareDetails(const std::string& report);
147
148  bool is_initial_request() const { return initial_request_; }
149
150  // The last time we received an update.
151  base::Time last_update() const { return last_update_; }
152
153  // Setter for additional_query_. To make sure the additional_query_ won't
154  // be changed in the middle of an update, caller (e.g.: SafeBrowsingService)
155  // should call this after callbacks triggered in UpdateFinished() or before
156  // IssueUpdateRequest().
157  void set_additional_query(const std::string& query) {
158    additional_query_ = query;
159  }
160  const std::string& additional_query() const {
161    return additional_query_;
162  }
163
164  // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
165  // ORDERING OF THESE VALUES.
166  enum ResultType {
167    // 200 response code means that the server recognized the hash
168    // prefix, while 204 is an empty response indicating that the
169    // server did not recognize it.
170    GET_HASH_STATUS_200,
171    GET_HASH_STATUS_204,
172
173    // Subset of successful responses which returned no full hashes.
174    // This includes the 204 case, and also 200 responses for stale
175    // prefixes (deleted at the server but yet deleted on the client).
176    GET_HASH_FULL_HASH_EMPTY,
177
178    // Subset of successful responses for which one or more of the
179    // full hashes matched (should lead to an interstitial).
180    GET_HASH_FULL_HASH_HIT,
181
182    // Subset of successful responses which weren't empty and have no
183    // matches.  It means that there was a prefix collision which was
184    // cleared up by the full hashes.
185    GET_HASH_FULL_HASH_MISS,
186
187    // Memory space for histograms is determined by the max.  ALWAYS
188    // ADD NEW VALUES BEFORE THIS ONE.
189    GET_HASH_RESULT_MAX
190  };
191
192  // Record a GetHash result. |is_download| indicates if the get
193  // hash is triggered by download related lookup.
194  static void RecordGetHashResult(bool is_download,
195                                  ResultType result_type);
196
197 protected:
198  // Constructs a SafeBrowsingProtocolManager for |sb_service| that issues
199  // network requests using |request_context_getter|. When |disable_auto_update|
200  // is true, protocol manager won't schedule next update until
201  // ForceScheduleNextUpdate is called.
202  SafeBrowsingProtocolManager(
203      SafeBrowsingService* sb_service,
204      const std::string& client_name,
205      const std::string& client_key,
206      const std::string& wrapped_key,
207      net::URLRequestContextGetter* request_context_getter,
208      const std::string& http_url_prefix,
209      const std::string& https_url_prefix,
210      bool disable_auto_update);
211 private:
212  friend class SBProtocolManagerFactoryImpl;
213
214  // Internal API for fetching information from the SafeBrowsing servers. The
215  // GetHash requests are higher priority since they can block user requests
216  // so are handled separately.
217  enum SafeBrowsingRequestType {
218    NO_REQUEST = 0,     // No requests in progress
219    UPDATE_REQUEST,     // Request for redirect URLs
220    CHUNK_REQUEST,      // Request for a specific chunk
221    GETKEY_REQUEST      // Update the client's MAC key
222  };
223
224  // Composes a URL using |prefix|, |method| (e.g.: gethash, download,
225  // newkey, report), |client_name| and |version|. When not empty,
226  // |additional_query| is appended to the URL with an additional "&"
227  // in the front.
228  static std::string ComposeUrl(const std::string& prefix,
229                                const std::string& method,
230                                const std::string& client_name,
231                                const std::string& version,
232                                const std::string& additional_query);
233
234  // Generates Update URL for querying about the latest set of chunk updates.
235  // Append "wrkey=xxx" to the URL when |use_mac| is true.
236  GURL UpdateUrl(bool use_mac) const;
237  // Generates GetHash request URL for retrieving full hashes.
238  // Append "wrkey=xxx" to the URL when |use_mac| is true.
239  GURL GetHashUrl(bool use_mac) const;
240  // Generates new MAC client key request URL.
241  GURL MacKeyUrl() const;
242  // Generates URL for reporting safe browsing hits for UMA users.
243  GURL SafeBrowsingHitUrl(
244      const GURL& malicious_url, const GURL& page_url, const GURL& referrer_url,
245      bool is_subresource,
246      SafeBrowsingService::UrlCheckResult threat_type) const;
247  // Generates URL for reporting malware details for users who opt-in.
248  GURL MalwareDetailsUrl() const;
249
250  // Composes a ChunkUrl based on input string.
251  GURL NextChunkUrl(const std::string& input) const;
252
253  // Returns the time (in milliseconds) for the next update request. If
254  // 'back_off' is true, the time returned will increment an error count and
255  // return the appriate next time (see ScheduleNextUpdate below).
256  int GetNextUpdateTime(bool back_off);
257
258  // Worker function for calculating GetHash and Update backoff times (in
259  // seconds). 'Multiplier' is doubled for each consecutive error between the
260  // 2nd and 5th, and 'error_count' is incremented with each call.
261  int GetNextBackOffTime(int* error_count, int* multiplier);
262
263  // Manages our update with the next allowable update time. If 'back_off_' is
264  // true, we must decrease the frequency of requests of the SafeBrowsing
265  // service according to section 5 of the protocol specification.
266  // When disable_auto_update_ is set, ScheduleNextUpdate will do nothing.
267  // ForceScheduleNextUpdate has to be called to trigger the update.
268  void ScheduleNextUpdate(bool back_off);
269
270  // Sends a request for a list of chunks we should download to the SafeBrowsing
271  // servers. In order to format this request, we need to send all the chunk
272  // numbers for each list that we have to the server. Getting the chunk numbers
273  // requires a database query (run on the database thread), and the request
274  // is sent upon completion of that query in OnGetChunksComplete.
275  void IssueUpdateRequest();
276
277  // Sends a request for a chunk to the SafeBrowsing servers.
278  void IssueChunkRequest();
279
280  // Gets a key from the SafeBrowsing servers for use with MAC. This should only
281  // be called once per client unless the server directly tells us to update.
282  void IssueKeyRequest();
283
284  // Formats a string returned from the database into:
285  //   "list_name;a:<add_chunk_ranges>:s:<sub_chunk_ranges>:mac\n"
286  static std::string FormatList(const SBListChunkRanges& list, bool use_mac);
287
288  // Runs the protocol parser on received data and update the
289  // SafeBrowsingService with the new content. Returns 'true' on successful
290  // parse, 'false' on error.
291  bool HandleServiceResponse(const GURL& url, const char* data, int length);
292
293  // If the SafeBrowsing service wants us to re-key, we clear our key state and
294  // issue the request.
295  void HandleReKey();
296
297  // Updates internal state for each GetHash response error, assuming that the
298  // current time is |now|.
299  void HandleGetHashError(const base::Time& now);
300
301  // Helper function for update completion.
302  void UpdateFinished(bool success);
303
304  // A callback that runs if we timeout waiting for a response to an update
305  // request. We use this to properly set our update state.
306  void UpdateResponseTimeout();
307
308 private:
309  // The factory that controls the creation of SafeBrowsingProtocolManager.
310  // This is used by tests.
311  static SBProtocolManagerFactory* factory_;
312
313  // Main SafeBrowsing interface object.
314  SafeBrowsingService* sb_service_;
315
316  // Current active request (in case we need to cancel) for updates or chunks
317  // from the SafeBrowsing service. We can only have one of these outstanding
318  // at any given time unlike GetHash requests, which are tracked separately.
319  scoped_ptr<URLFetcher> request_;
320
321  // The kind of request that is currently in progress.
322  SafeBrowsingRequestType request_type_;
323
324  // The number of HTTP response errors, used for request backoff timing.
325  int update_error_count_;
326  int gethash_error_count_;
327
328  // Multipliers which double (max == 8) for each error after the second.
329  int update_back_off_mult_;
330  int gethash_back_off_mult_;
331
332  // Multiplier between 0 and 1 to spread clients over an interval.
333  float back_off_fuzz_;
334
335  // The list for which we are make a request.
336  std::string list_name_;
337
338  // For managing the next earliest time to query the SafeBrowsing servers for
339  // updates.
340  int next_update_sec_;
341  base::OneShotTimer<SafeBrowsingProtocolManager> update_timer_;
342
343  // All chunk requests that need to be made, along with their MAC.
344  std::deque<ChunkUrl> chunk_request_urls_;
345
346  // Map of GetHash requests.
347  typedef base::hash_map<const URLFetcher*,
348                         SafeBrowsingService::SafeBrowsingCheck*> HashRequests;
349  HashRequests hash_requests_;
350
351  // The next scheduled update has special behavior for the first 2 requests.
352  enum UpdateRequestState {
353    FIRST_REQUEST = 0,
354    SECOND_REQUEST,
355    NORMAL_REQUEST
356  };
357  UpdateRequestState update_state_;
358
359  // We'll attempt to get keys once per browser session if we don't already have
360  // them. They are not essential to operation, but provide a layer of
361  // verification.
362  bool initial_request_;
363
364  // True if the service has been given an add/sub chunk but it hasn't been
365  // added to the database yet.
366  bool chunk_pending_to_write_;
367
368  // The keys used for MAC. Empty keys mean we aren't using MAC.
369  std::string client_key_;
370  std::string wrapped_key_;
371
372  // The last time we successfully received an update.
373  base::Time last_update_;
374
375  // While in GetHash backoff, we can't make another GetHash until this time.
376  base::Time next_gethash_time_;
377
378  // Current product version sent in each request.
379  std::string version_;
380
381  // Used for measuring chunk request latency.
382  base::Time chunk_request_start_;
383
384  // Tracks the size of each update (in bytes).
385  int update_size_;
386
387  // Track outstanding SafeBrowsing report fetchers for clean up.
388  // We add both "hit" and "detail" fetchers in this set.
389  std::set<const URLFetcher*> safebrowsing_reports_;
390
391  // The safe browsing client name sent in each request.
392  std::string client_name_;
393
394  // A string that is appended to the end of URLs for download, gethash,
395  // newkey, safebrowsing hits and chunk update requests.
396  std::string additional_query_;
397
398  // The context we use to issue network requests.
399  scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
400
401  // URL prefix where browser fetches safebrowsing chunk updates, hashes, and
402  // reports hits to the safebrowsing list for UMA users.
403  std::string http_url_prefix_;
404
405  // URL prefix where browser fetches MAC client key, and reports detailed
406  // malware reports for users who opt-in.
407  std::string https_url_prefix_;
408
409  // When true, protocol manager will not start an update unless
410  // ForceScheduleNextUpdate() is called. This is set for testing purpose.
411  bool disable_auto_update_;
412
413  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolManager);
414};
415
416#endif  // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
417