1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
6#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
7#pragma once
8
9#include <set>
10#include <vector>
11
12#include "base/file_path.h"
13#include "base/memory/scoped_ptr.h"
14#include "base/synchronization/lock.h"
15#include "base/task.h"
16#include "chrome/browser/safe_browsing/safe_browsing_store.h"
17#include "testing/gtest/include/gtest/gtest_prod.h"
18
19namespace base {
20  class Time;
21}
22
23namespace safe_browsing {
24class PrefixSet;
25}
26
27class BloomFilter;
28class GURL;
29class MessageLoop;
30class SafeBrowsingDatabase;
31
32// Factory for creating SafeBrowsingDatabase. Tests implement this factory
33// to create fake Databases for testing.
34class SafeBrowsingDatabaseFactory {
35 public:
36  SafeBrowsingDatabaseFactory() { }
37  virtual ~SafeBrowsingDatabaseFactory() { }
38  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
39      bool enable_download_protection,
40      bool enable_client_side_whitelist) = 0;
41 private:
42  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory);
43};
44
45
46// Encapsulates on-disk databases that for safebrowsing. There are
47// three databases: browse, download and client-side detection (csd)
48// whitelist databases. The browse database contains information
49// about phishing and malware urls. The download database contains
50// URLs for bad binaries (e.g: those containing virus) and hash of
51// these downloaded contents. The csd whitelist database contains URLs
52// that will never be considered as phishing by the client-side
53// phishing detection. These on-disk databases are shared among all
54// profiles, as it doesn't contain user-specific data. This object is
55// not thread-safe, i.e. all its methods should be used on the same
56// thread that it was created on.
57class SafeBrowsingDatabase {
58 public:
59  // Factory method for obtaining a SafeBrowsingDatabase implementation.
60  // It is not thread safe.
61  // |enable_download_protection| is used to control the download database
62  // feature.
63  // |enable_client_side_whitelist| is used to control the csd whitelist
64  // database feature.
65  static SafeBrowsingDatabase* Create(bool enable_download_protection,
66                                      bool enable_client_side_whitelist);
67
68  // Makes the passed |factory| the factory used to instantiate
69  // a SafeBrowsingDatabase. This is used for tests.
70  static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) {
71    factory_ = factory;
72  }
73
74  virtual ~SafeBrowsingDatabase();
75
76  // Initializes the database with the given filename.
77  virtual void Init(const FilePath& filename) = 0;
78
79  // Deletes the current database and creates a new one.
80  virtual bool ResetDatabase() = 0;
81
82  // Returns false if |url| is not in the browse database.  If it
83  // returns true, then either |matching_list| is the name of the matching
84  // list, or |prefix_hits| and |full_hits| contains the matching hash
85  // prefixes.  This function is safe to call from threads other than
86  // the creation thread.
87  virtual bool ContainsBrowseUrl(const GURL& url,
88                                 std::string* matching_list,
89                                 std::vector<SBPrefix>* prefix_hits,
90                                 std::vector<SBFullHashResult>* full_hits,
91                                 base::Time last_update) = 0;
92
93  // Returns false if none of |urls| are in Download database. If it returns
94  // true, |prefix_hits| should contain the prefixes for the URLs that were in
95  // the database.  This function could ONLY be accessed from creation thread.
96  virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
97                                   std::vector<SBPrefix>* prefix_hits) = 0;
98
99  // Returns false if |prefix| is not in Download database.
100  // This function could ONLY be accessed from creation thread.
101  virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0;
102
103  // Returns false if |url| is not on the client-side phishing detection
104  // whitelist.  Otherwise, this function returns true.  Note: the whitelist
105  // only contains full-length hashes so we don't return any prefix hit.
106  // This function should only be called from the IO thread.
107  virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0;
108
109  // A database transaction should look like:
110  //
111  // std::vector<SBListChunkRanges> lists;
112  // if (db.UpdateStarted(&lists)) {
113  //   // Do something with |lists|.
114  //
115  //   // Process add/sub commands.
116  //   db.InsertChunks(list_name, chunks);
117  //
118  //   // Process adddel/subdel commands.
119  //   db.DeleteChunks(chunks_deletes);
120  //
121  //   // If passed true, processes the collected chunk info and
122  //   // rebuilds the bloom filter.  If passed false, rolls everything
123  //   // back.
124  //   db.UpdateFinished(success);
125  // }
126  //
127  // If UpdateStarted() returns true, the caller MUST eventually call
128  // UpdateFinished().  If it returns false, the caller MUST NOT call
129  // the other functions.
130  virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0;
131  virtual void InsertChunks(const std::string& list_name,
132                            const SBChunkList& chunks) = 0;
133  virtual void DeleteChunks(
134      const std::vector<SBChunkDelete>& chunk_deletes) = 0;
135  virtual void UpdateFinished(bool update_succeeded) = 0;
136
137  // Store the results of a GetHash response. In the case of empty results, we
138  // cache the prefixes until the next update so that we don't have to issue
139  // further GetHash requests we know will be empty.
140  virtual void CacheHashResults(
141      const std::vector<SBPrefix>& prefixes,
142      const std::vector<SBFullHashResult>& full_hits) = 0;
143
144  // The name of the bloom-filter file for the given database file.
145  static FilePath BloomFilterForFilename(const FilePath& db_filename);
146
147  // Filename for malware and phishing URL database.
148  static FilePath BrowseDBFilename(const FilePath& db_base_filename);
149
150  // Filename for download URL and download binary hash database.
151  static FilePath DownloadDBFilename(const FilePath& db_base_filename);
152
153  // Filename for client-side phishing detection whitelist databsae.
154  static FilePath CsdWhitelistDBFilename(
155      const FilePath& csd_whitelist_base_filename);
156
157  // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
158  // ORDERING OF THESE VALUES.
159  enum FailureType {
160    FAILURE_DATABASE_CORRUPT,
161    FAILURE_DATABASE_CORRUPT_HANDLER,
162    FAILURE_BROWSE_DATABASE_UPDATE_BEGIN,
163    FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
164    FAILURE_DATABASE_FILTER_MISSING,
165    FAILURE_DATABASE_FILTER_READ,
166    FAILURE_DATABASE_FILTER_WRITE,
167    FAILURE_DATABASE_FILTER_DELETE,
168    FAILURE_DATABASE_STORE_MISSING,
169    FAILURE_DATABASE_STORE_DELETE,
170    FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN,
171    FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH,
172    FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN,
173    FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH,
174
175    // Memory space for histograms is determined by the max.  ALWAYS
176    // ADD NEW VALUES BEFORE THIS ONE.
177    FAILURE_DATABASE_MAX
178  };
179
180  static void RecordFailure(FailureType failure_type);
181
182 private:
183  // The factory used to instantiate a SafeBrowsingDatabase object.
184  // Useful for tests, so they can provide their own implementation of
185  // SafeBrowsingDatabase.
186  static SafeBrowsingDatabaseFactory* factory_;
187};
188
189class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase {
190 public:
191  // Create a database with a browse store, download store and
192  // csd_whitelist_store. Takes ownership of browse_store, download_store and
193  // csd_whitelist_store. When |download_store| is NULL, the database
194  // will ignore any operations related download (url hashes and
195  // binary hashes).  Same for the |csd_whitelist_store|.
196  SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store,
197                          SafeBrowsingStore* download_store,
198                          SafeBrowsingStore* csd_whitelist_store);
199
200  // Create a database with a browse store. This is a legacy interface that
201  // useds Sqlite.
202  SafeBrowsingDatabaseNew();
203
204  virtual ~SafeBrowsingDatabaseNew();
205
206  // Implement SafeBrowsingDatabase interface.
207  virtual void Init(const FilePath& filename);
208  virtual bool ResetDatabase();
209  virtual bool ContainsBrowseUrl(const GURL& url,
210                                 std::string* matching_list,
211                                 std::vector<SBPrefix>* prefix_hits,
212                                 std::vector<SBFullHashResult>* full_hits,
213                                 base::Time last_update);
214  virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
215                                   std::vector<SBPrefix>* prefix_hits);
216  virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix);
217  virtual bool ContainsCsdWhitelistedUrl(const GURL& url);
218  virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists);
219  virtual void InsertChunks(const std::string& list_name,
220                            const SBChunkList& chunks);
221  virtual void DeleteChunks(const std::vector<SBChunkDelete>& chunk_deletes);
222  virtual void UpdateFinished(bool update_succeeded);
223  virtual void CacheHashResults(const std::vector<SBPrefix>& prefixes,
224                                const std::vector<SBFullHashResult>& full_hits);
225
226 private:
227  friend class SafeBrowsingDatabaseTest;
228  FRIEND_TEST(SafeBrowsingDatabaseTest, HashCaching);
229
230  // Return the browse_store_, download_store_ or csd_whitelist_store_
231  // based on list_id.
232  SafeBrowsingStore* GetStore(int list_id);
233
234    // Deletes the files on disk.
235  bool Delete();
236
237  // Load the bloom filter off disk, or generates one if it doesn't exist.
238  void LoadBloomFilter();
239
240  // Writes the current bloom filter to disk.
241  void WriteBloomFilter();
242
243  // Loads the given full-length hashes to the csd whitelist.  If the number
244  // of hashes is too large or if the kill switch URL is on the whitelist
245  // we will whitelist all URLs.
246  void LoadCsdWhitelist(const std::vector<SBAddFullHash>& full_hashes);
247
248  // Call this method if an error occured with the csd whitelist.  This will
249  // result in all calls to ContainsCsdWhitelistedUrl() to returning true.
250  void CsdWhitelistAllUrls();
251
252  // Helpers for handling database corruption.
253  // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
254  // |corruption_detected_|, |HandleCorruptDatabase()| posts
255  // |OnHandleCorruptDatabase()| to the current thread, to be run
256  // after the current task completes.
257  // TODO(shess): Wire things up to entirely abort the update
258  // transaction when this happens.
259  void HandleCorruptDatabase();
260  void OnHandleCorruptDatabase();
261
262  // Helpers for InsertChunks().
263  void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
264  void InsertAddChunks(int list_id, const SBChunkList& chunks);
265  void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
266  void InsertSubChunks(int list_id, const SBChunkList& chunks);
267
268  void UpdateDownloadStore();
269  void UpdateBrowseStore();
270  void UpdateCsdWhitelistStore();
271
272  // Helper function to compare addprefixes in download_store_ with |prefixes|.
273  // The |list_bit| indicates which list (download url or download hash)
274  // to compare.
275  // Returns true if there is a match, |*prefix_hits| will contain the actual
276  // matching prefixes.
277  bool MatchDownloadAddPrefixes(int list_bit,
278                                const std::vector<SBPrefix>& prefixes,
279                                std::vector<SBPrefix>* prefix_hits);
280
281  // Used to verify that various calls are made from the thread the
282  // object was created on.
283  MessageLoop* creation_loop_;
284
285  // Lock for protecting access to variables that may be used on the
286  // IO thread.  This includes |browse_bloom_filter_|, |full_browse_hashes_|,
287  // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|, and
288  // |csd_whitelist_all_urls_|.
289  base::Lock lookup_lock_;
290
291  // Underlying persistent store for chunk data.
292  // For browsing related (phishing and malware URLs) chunks and prefixes.
293  FilePath browse_filename_;
294  scoped_ptr<SafeBrowsingStore> browse_store_;
295
296  // For download related (download URL and binary hash) chunks and prefixes.
297  FilePath download_filename_;
298  scoped_ptr<SafeBrowsingStore> download_store_;
299
300  // For the client-side phishing detection whitelist chunks and full-length
301  // hashes.  This list only contains 256 bit hashes.
302  FilePath csd_whitelist_filename_;
303  scoped_ptr<SafeBrowsingStore> csd_whitelist_store_;
304
305  // All the client-side phishing detection whitelist entries are loaded in
306  // a sorted vector.
307  std::vector<SBFullHash> csd_whitelist_;
308
309  // If true, ContainsCsdWhitelistedUrl will always return true for all URLs.
310  // This is set to true if the csd whitelist is too large to be stored in
311  // memory, if the kill switch URL is on the csd whitelist or if there was
312  // an error during the most recent update.
313  bool csd_whitelist_all_urls_;
314
315  // Bloom filter generated from the add-prefixes in |browse_store_|.
316  // Only browse_store_ requires the BloomFilter for fast query.
317  FilePath bloom_filter_filename_;
318  scoped_refptr<BloomFilter> browse_bloom_filter_;
319
320  // Cached browse store related full-hash items, ordered by prefix for
321  // efficient scanning.
322  // |full_browse_hashes_| are items from |browse_store_|,
323  // |pending_browse_hashes_| are items from |CacheHashResults()|, which
324  // will be pushed to the store on the next update.
325  std::vector<SBAddFullHash> full_browse_hashes_;
326  std::vector<SBAddFullHash> pending_browse_hashes_;
327
328  // Cache of prefixes that returned empty results (no full hash
329  // match) to |CacheHashResults()|.  Cached to prevent asking for
330  // them every time.  Cleared on next update.
331  std::set<SBPrefix> prefix_miss_cache_;
332
333  // Used to schedule resetting the database because of corruption.
334  ScopedRunnableMethodFactory<SafeBrowsingDatabaseNew> reset_factory_;
335
336  // Set if corruption is detected during the course of an update.
337  // Causes the update functions to fail with no side effects, until
338  // the next call to |UpdateStarted()|.
339  bool corruption_detected_;
340
341  // Set to true if any chunks are added or deleted during an update.
342  // Used to optimize away database update.
343  bool change_detected_;
344
345  // Used to check if a prefix was in the database.
346  scoped_ptr<safe_browsing::PrefixSet> prefix_set_;
347};
348
349#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
350