1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// The Safe Browsing service is responsible for downloading anti-phishing and
6// anti-malware tables and checking urls against them.
7
8#ifndef CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
9#define CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
10
11#include <deque>
12#include <map>
13#include <set>
14#include <string>
15#include <vector>
16
17#include "base/callback.h"
18#include "base/containers/hash_tables.h"
19#include "base/memory/ref_counted.h"
20#include "base/memory/scoped_ptr.h"
21#include "base/synchronization/lock.h"
22#include "base/time/time.h"
23#include "chrome/browser/safe_browsing/protocol_manager.h"
24#include "chrome/browser/safe_browsing/safe_browsing_util.h"
25#include "url/gurl.h"
26
27class SafeBrowsingService;
28class SafeBrowsingDatabase;
29
30namespace base {
31class Thread;
32}
33
34namespace net {
35class URLRequestContext;
36class URLRequestContextGetter;
37}
38
39namespace safe_browsing {
40class ClientSideDetectionService;
41class DownloadProtectionService;
42}
43
44// Construction needs to happen on the main thread.
45class SafeBrowsingDatabaseManager
46    : public base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>,
47      public SafeBrowsingProtocolManagerDelegate {
48 public:
49  class Client;
50
51  // Bundle of SafeBrowsing state while performing a URL or hash prefix check.
52  struct SafeBrowsingCheck {
53    // |check_type| should correspond to the type of item that is being
54    // checked, either a URL or a binary hash/URL. We store this for two
55    // purposes: to know which of Client's methods to call when a result is
56    // known, and for logging purposes. It *isn't* used to predict the response
57    // list type, that is information that the server gives us.
58    SafeBrowsingCheck(const std::vector<GURL>& urls,
59                      const std::vector<SBFullHash>& full_hashes,
60                      Client* client,
61                      safe_browsing_util::ListType check_type,
62                      const std::vector<SBThreatType>& expected_threats);
63    ~SafeBrowsingCheck();
64
65    // Either |urls| or |full_hashes| is used to lookup database. |*_results|
66    // are parallel vectors containing the results. They are initialized to
67    // contain SB_THREAT_TYPE_SAFE.
68    std::vector<GURL> urls;
69    std::vector<SBThreatType> url_results;
70    std::vector<std::string> url_metadata;
71    std::vector<SBFullHash> full_hashes;
72    std::vector<SBThreatType> full_hash_results;
73
74    Client* client;
75    bool need_get_hash;
76    base::TimeTicks start;  // When check was sent to SB service.
77    safe_browsing_util::ListType check_type;  // See comment in constructor.
78    std::vector<SBThreatType> expected_threats;
79    std::vector<SBPrefix> prefix_hits;
80    std::vector<SBFullHashResult> cache_hits;
81
82    // Vends weak pointers for TimeoutCallback().  If the response is
83    // received before the timeout fires, factory is destructed and
84    // the timeout won't be fired.
85    // TODO(lzheng): We should consider to use this time out check
86    // for browsing too (instead of implementin in
87    // safe_browsing_resource_handler.cc).
88    scoped_ptr<base::WeakPtrFactory<
89        SafeBrowsingDatabaseManager> > timeout_factory_;
90
91   private:
92    DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck);
93  };
94
95  class Client {
96   public:
97    void OnSafeBrowsingResult(const SafeBrowsingCheck& check);
98
99   protected:
100    virtual ~Client() {}
101
102    // Called when the result of checking a browse URL is known.
103    virtual void OnCheckBrowseUrlResult(const GURL& url,
104                                        SBThreatType threat_type,
105                                        const std::string& metadata) {}
106
107    // Called when the result of checking a download URL is known.
108    virtual void OnCheckDownloadUrlResult(const std::vector<GURL>& url_chain,
109                                          SBThreatType threat_type) {}
110
111    // Called when the result of checking a set of extensions is known.
112    virtual void OnCheckExtensionsResult(
113        const std::set<std::string>& threats) {}
114  };
115
116  // Creates the safe browsing service.  Need to initialize before using.
117  explicit SafeBrowsingDatabaseManager(
118      const scoped_refptr<SafeBrowsingService>& service);
119
120  // Returns true if the url's scheme can be checked.
121  bool CanCheckUrl(const GURL& url) const;
122
123  // Returns whether download protection is enabled.
124  bool download_protection_enabled() const {
125    return enable_download_protection_;
126  }
127
128  // Called on the IO thread to check if the given url is safe or not.  If we
129  // can synchronously determine that the url is safe, CheckUrl returns true.
130  // Otherwise it returns false, and "client" is called asynchronously with the
131  // result when it is ready.
132  virtual bool CheckBrowseUrl(const GURL& url, Client* client);
133
134  // Check if the prefix for |url| is in safebrowsing download add lists.
135  // Result will be passed to callback in |client|.
136  virtual bool CheckDownloadUrl(const std::vector<GURL>& url_chain,
137                                Client* client);
138
139  // Check which prefixes in |extension_ids| are in the safebrowsing blacklist.
140  // Returns true if not, false if further checks need to be made in which case
141  // the result will be passed to |client|.
142  virtual bool CheckExtensionIDs(const std::set<std::string>& extension_ids,
143                                 Client* client);
144
145  // Check if the given url is on the side-effect free whitelist.
146  // Can be called on any thread. Returns false if the check cannot be performed
147  // (e.g. because we are disabled or because of an invalid scheme in the URL).
148  // Otherwise, returns true if the URL is on the whitelist based on matching
149  // the hash prefix only (so there may be false positives).
150  virtual bool CheckSideEffectFreeWhitelistUrl(const GURL& url);
151
152  // Check if the |url| matches any of the full-length hashes from the
153  // client-side phishing detection whitelist.  Returns true if there was a
154  // match and false otherwise.  To make sure we are conservative we will return
155  // true if an error occurs. This method is expected to be called on the IO
156  // thread.
157  virtual bool MatchCsdWhitelistUrl(const GURL& url);
158
159  // Check if the given IP address (either IPv4 or IPv6) matches the malware
160  // IP blacklist.
161  virtual bool MatchMalwareIP(const std::string& ip_address);
162
163  // Check if the |url| matches any of the full-length hashes from the
164  // download whitelist.  Returns true if there was a match and false otherwise.
165  // To make sure we are conservative we will return true if an error occurs.
166  // This method is expected to be called on the IO thread.
167  virtual bool MatchDownloadWhitelistUrl(const GURL& url);
168
169  // Check if |str| matches any of the full-length hashes from the download
170  // whitelist.  Returns true if there was a match and false otherwise.
171  // To make sure we are conservative we will return true if an error occurs.
172  // This method is expected to be called on the IO thread.
173  virtual bool MatchDownloadWhitelistString(const std::string& str);
174
175  // Check if the CSD malware IP matching kill switch is turned on.
176  virtual bool IsMalwareKillSwitchOn();
177
178  // Check if the CSD whitelist kill switch is turned on.
179  virtual bool IsCsdWhitelistKillSwitchOn();
180
181  // Called on the IO thread to cancel a pending check if the result is no
182  // longer needed.
183  void CancelCheck(Client* client);
184
185  // Called on the IO thread when the SafeBrowsingProtocolManager has received
186  // the full hash results for prefix hits detected in the database.
187  void HandleGetHashResults(SafeBrowsingCheck* check,
188                            const std::vector<SBFullHashResult>& full_hashes,
189                            const base::TimeDelta& cache_lifetime);
190
191  // Log the user perceived delay caused by SafeBrowsing. This delay is the time
192  // delta starting from when we would have started reading data from the
193  // network, and ending when the SafeBrowsing check completes indicating that
194  // the current page is 'safe'.
195  void LogPauseDelay(base::TimeDelta time);
196
197  // Called to initialize objects that are used on the io_thread.  This may be
198  // called multiple times during the life of the DatabaseManager. Should be
199  // called on IO thread.
200  void StartOnIOThread();
201
202  // Called to stop or shutdown operations on the io_thread. This may be called
203  // multiple times during the life of the DatabaseManager. Should be called
204  // on IO thread. If shutdown is true, the manager is disabled permanently.
205  void StopOnIOThread(bool shutdown);
206
207 protected:
208  virtual ~SafeBrowsingDatabaseManager();
209
210  // protected for tests.
211  void NotifyDatabaseUpdateFinished(bool update_succeeded);
212
213 private:
214  friend class base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>;
215  friend class SafeBrowsingServerTest;
216  friend class SafeBrowsingServiceTest;
217  friend class SafeBrowsingServiceTestHelper;
218  friend class SafeBrowsingDatabaseManagerTest;
219  FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseManagerTest, GetUrlThreatType);
220
221  typedef std::set<SafeBrowsingCheck*> CurrentChecks;
222  typedef std::vector<SafeBrowsingCheck*> GetHashRequestors;
223  typedef base::hash_map<SBPrefix, GetHashRequestors> GetHashRequests;
224
225  // Clients that we've queued up for checking later once the database is ready.
226  struct QueuedCheck {
227    QueuedCheck(const safe_browsing_util::ListType check_type,
228                Client* client,
229                const GURL& url,
230                const std::vector<SBThreatType>& expected_threats,
231                const base::TimeTicks& start);
232    ~QueuedCheck();
233    safe_browsing_util::ListType check_type;
234    Client* client;
235    GURL url;
236    std::vector<SBThreatType> expected_threats;
237    base::TimeTicks start;  // When check was queued.
238  };
239
240  // Return the threat type from the first result in |full_hashes| which matches
241  // |hash|, or SAFE if none match.
242  static SBThreatType GetHashThreatType(
243      const SBFullHash& hash,
244      const std::vector<SBFullHashResult>& full_hashes);
245
246  // Given a URL, compare all the possible host + path full hashes to the set of
247  // provided full hashes.  Returns the threat type of the matching result from
248  // |full_hashes|, or SAFE if none match.
249  static SBThreatType GetUrlThreatType(
250      const GURL& url,
251      const std::vector<SBFullHashResult>& full_hashes,
252      size_t* index);
253
254  // Called to stop operations on the io_thread. This may be called multiple
255  // times during the life of the DatabaseManager. Should be called on IO
256  // thread.
257  void DoStopOnIOThread();
258
259  // Returns whether |database_| exists and is accessible.
260  bool DatabaseAvailable() const;
261
262  // Called on the IO thread.  If the database does not exist, queues up a call
263  // on the db thread to create it.  Returns whether the database is available.
264  //
265  // Note that this is only needed outside the db thread, since functions on the
266  // db thread can call GetDatabase() directly.
267  bool MakeDatabaseAvailable();
268
269  // Should only be called on db thread as SafeBrowsingDatabase is not
270  // threadsafe.
271  SafeBrowsingDatabase* GetDatabase();
272
273  // Called on the IO thread with the check result.
274  void OnCheckDone(SafeBrowsingCheck* info);
275
276  // Called on the database thread to retrieve chunks.
277  void GetAllChunksFromDatabase(GetChunksCallback callback);
278
279  // Called on the IO thread with the results of all chunks.
280  void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists,
281                                  bool database_error,
282                                  GetChunksCallback callback);
283
284  // Called on the IO thread after the database reports that it added a chunk.
285  void OnAddChunksComplete(AddChunksCallback callback);
286
287  // Notification that the database is done loading its bloom filter.  We may
288  // have had to queue checks until the database is ready, and if so, this
289  // checks them.
290  void DatabaseLoadComplete();
291
292  // Called on the database thread to add/remove chunks and host keys.
293  void AddDatabaseChunks(const std::string& list,
294                         scoped_ptr<ScopedVector<SBChunkData> > chunks,
295                         AddChunksCallback callback);
296
297  void DeleteDatabaseChunks(
298      scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes);
299
300  void NotifyClientBlockingComplete(Client* client, bool proceed);
301
302  void DatabaseUpdateFinished(bool update_succeeded);
303
304  // Called on the db thread to close the database.  See CloseDatabase().
305  void OnCloseDatabase();
306
307  // Runs on the db thread to reset the database. We assume that resetting the
308  // database is a synchronous operation.
309  void OnResetDatabase();
310
311  // Internal worker function for processing full hashes.
312  void OnHandleGetHashResults(SafeBrowsingCheck* check,
313                              const std::vector<SBFullHashResult>& full_hashes);
314
315  // Run one check against |full_hashes|.  Returns |true| if the check
316  // finds a match in |full_hashes|.
317  bool HandleOneCheck(SafeBrowsingCheck* check,
318                      const std::vector<SBFullHashResult>& full_hashes);
319
320  // Invoked by CheckDownloadUrl. It checks the download URL on
321  // safe_browsing_thread_.
322  void CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check);
323
324  // The callback function when a safebrowsing check is timed out. Client will
325  // be notified that the safebrowsing check is SAFE when this happens.
326  void TimeoutCallback(SafeBrowsingCheck* check);
327
328  // Calls the Client's callback on IO thread after CheckDownloadUrl finishes.
329  void CheckDownloadUrlDone(SafeBrowsingCheck* check);
330
331  // Checks all extension ID hashes on safe_browsing_thread_.
332  void CheckExtensionIDsOnSBThread(SafeBrowsingCheck* check);
333
334  // Helper function that calls safe browsing client and cleans up |checks_|.
335  void SafeBrowsingCheckDone(SafeBrowsingCheck* check);
336
337  // Helper function to set |check| with default values and start a safe
338  // browsing check with timeout of |timeout|. |task| will be called on
339  // success, otherwise TimeoutCallback will be called.
340  void StartSafeBrowsingCheck(SafeBrowsingCheck* check,
341                              const base::Closure& task);
342
343  // SafeBrowsingProtocolManageDelegate override
344  virtual void ResetDatabase() OVERRIDE;
345  virtual void UpdateStarted() OVERRIDE;
346  virtual void UpdateFinished(bool success) OVERRIDE;
347  virtual void GetChunks(GetChunksCallback callback) OVERRIDE;
348  virtual void AddChunks(const std::string& list,
349                         scoped_ptr<ScopedVector<SBChunkData> > chunks,
350                         AddChunksCallback callback) OVERRIDE;
351  virtual void DeleteChunks(
352      scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes) OVERRIDE;
353
354  scoped_refptr<SafeBrowsingService> sb_service_;
355
356  CurrentChecks checks_;
357
358  // Used for issuing only one GetHash request for a given prefix.
359  GetHashRequests gethash_requests_;
360
361  // The persistent database.  We don't use a scoped_ptr because it
362  // needs to be destroyed on a different thread than this object.
363  SafeBrowsingDatabase* database_;
364
365  // Lock used to prevent possible data races due to compiler optimizations.
366  mutable base::Lock database_lock_;
367
368  // Whether the service is running. 'enabled_' is used by the
369  // SafeBrowsingDatabaseManager on the IO thread during normal operations.
370  bool enabled_;
371
372  // Indicate if download_protection is enabled by command switch
373  // so we allow this feature to be exersized.
374  bool enable_download_protection_;
375
376  // Indicate if client-side phishing detection whitelist should be enabled
377  // or not.
378  bool enable_csd_whitelist_;
379
380  // Indicate if the download whitelist should be enabled or not.
381  bool enable_download_whitelist_;
382
383  // Indicate if the extension blacklist should be enabled.
384  bool enable_extension_blacklist_;
385
386  // Indicate if the side effect free whitelist should be enabled.
387  bool enable_side_effect_free_whitelist_;
388
389  // Indicate if the csd malware IP blacklist should be enabled.
390  bool enable_ip_blacklist_;
391
392  // The SafeBrowsing thread that runs database operations.
393  //
394  // Note: Functions that run on this thread should run synchronously and return
395  // to the IO thread, not post additional tasks back to this thread, lest we
396  // cause a race condition at shutdown time that leads to a database leak.
397  scoped_ptr<base::Thread> safe_browsing_thread_;
398
399  // Indicates if we're currently in an update cycle.
400  bool update_in_progress_;
401
402  // When true, newly fetched chunks may not in the database yet since the
403  // database is still updating.
404  bool database_update_in_progress_;
405
406  // Indicates if we're in the midst of trying to close the database.  If this
407  // is true, nothing on the IO thread should access the database.
408  bool closing_database_;
409
410  std::deque<QueuedCheck> queued_checks_;
411
412  // Timeout to use for safe browsing checks.
413  base::TimeDelta check_timeout_;
414
415  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseManager);
416};
417
418#endif  // CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
419