1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7#include <algorithm>
8#include <iterator>
9
10#include "base/file_util.h"
11#include "base/metrics/histogram.h"
12#include "base/metrics/stats_counters.h"
13#include "base/time.h"
14#include "base/message_loop.h"
15#include "base/process_util.h"
16#include "crypto/sha2.h"
17#include "chrome/browser/safe_browsing/bloom_filter.h"
18#include "chrome/browser/safe_browsing/prefix_set.h"
19#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
20#include "content/browser/browser_thread.h"
21#include "googleurl/src/gurl.h"
22
23namespace {
24
25// Filename suffix for the bloom filter.
26const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
27// Filename suffix for download store.
28const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download");
29// Filename suffix for client-side phishing detection whitelist store.
30const FilePath::CharType kCsdWhitelistDBFile[] =
31    FILE_PATH_LITERAL(" Csd Whitelist");
32// Filename suffix for browse store.
33// TODO(lzheng): change to a better name when we change the file format.
34const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
35
36// The maximum staleness for a cached entry.
37const int kMaxStalenessMinutes = 45;
38
39// Maximum number of entries we allow in the client-side phishing detection
40// whitelist.  If the whitelist on disk contains more entries then
41// ContainsCsdWhitelistedUrl will always return true.
42const size_t kMaxCsdWhitelistSize = 5000;
43
44// If the hash of this exact expression is on the csd whitelist then
45// ContainsCsdWhitelistedUrl will always return true.
46const char kCsdKillSwitchUrl[] =
47    "sb-ssl.google.com/safebrowsing/csd/killswitch";
48
49// To save space, the incoming |chunk_id| and |list_id| are combined
50// into an |encoded_chunk_id| for storage by shifting the |list_id|
51// into the low-order bits.  These functions decode that information.
52// TODO(lzheng): It was reasonable when database is saved in sqlite, but
53// there should be better ways to save chunk_id and list_id after we use
54// SafeBrowsingStoreFile.
55int GetListIdBit(const int encoded_chunk_id) {
56  return encoded_chunk_id & 1;
57}
58int DecodeChunkId(int encoded_chunk_id) {
59  return encoded_chunk_id >> 1;
60}
61int EncodeChunkId(const int chunk, const int list_id) {
62  DCHECK_NE(list_id, safe_browsing_util::INVALID);
63  return chunk << 1 | list_id % 2;
64}
65
66// Generate the set of full hashes to check for |url|.  If
67// |include_whitelist_hashes| is true we will generate additional path-prefixes
68// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
69// whitelist it should also match /foo/bar which is not the case for all the
70// other lists.
71// TODO(shess): This function is almost the same as
72// |CompareFullHashes()| in safe_browsing_util.cc, except that code
73// does an early exit on match.  Since match should be the infrequent
74// case (phishing or malware found), consider combining this function
75// with that one.
76void BrowseFullHashesToCheck(const GURL& url,
77                             bool include_whitelist_hashes,
78                             std::vector<SBFullHash>* full_hashes) {
79  std::vector<std::string> hosts;
80  if (url.HostIsIPAddress()) {
81    hosts.push_back(url.host());
82  } else {
83    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
84  }
85
86  std::vector<std::string> paths;
87  safe_browsing_util::GeneratePathsToCheck(url, &paths);
88
89  for (size_t i = 0; i < hosts.size(); ++i) {
90    for (size_t j = 0; j < paths.size(); ++j) {
91      const std::string& path = paths[j];
92      SBFullHash full_hash;
93      crypto::SHA256HashString(hosts[i] + path, &full_hash,
94                               sizeof(full_hash));
95      full_hashes->push_back(full_hash);
96
97      // We may have /foo as path-prefix in the whitelist which should
98      // also match with /foo/bar and /foo?bar.  Hence, for every path
99      // that ends in '/' we also add the path without the slash.
100      if (include_whitelist_hashes &&
101          path.size() > 1 &&
102          path[path.size() - 1] == '/') {
103        crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
104                                 &full_hash, sizeof(full_hash));
105        full_hashes->push_back(full_hash);
106      }
107    }
108  }
109}
110
111// Get the prefixes matching the download |urls|.
112void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
113                            std::vector<SBPrefix>* prefixes) {
114  std::vector<SBFullHash> full_hashes;
115  for (size_t i = 0; i < urls.size(); ++i)
116    BrowseFullHashesToCheck(urls[i], false, &full_hashes);
117
118  for (size_t i = 0; i < full_hashes.size(); ++i)
119    prefixes->push_back(full_hashes[i].prefix);
120}
121
122// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
123// add them to |full_hits| if not expired.  "Not expired" is when
124// either |last_update| was recent enough, or the item has been
125// received recently enough.  Expired items are not deleted because a
126// future update may make them acceptable again.
127//
128// For efficiency reasons the code walks |prefix_hits| and
129// |full_hashes| in parallel, so they must be sorted by prefix.
130void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
131                                  const std::vector<SBAddFullHash>& full_hashes,
132                                  std::vector<SBFullHashResult>* full_hits,
133                                  base::Time last_update) {
134  const base::Time expire_time =
135      base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
136
137  std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
138  std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
139
140  while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
141    if (*piter < hiter->full_hash.prefix) {
142      ++piter;
143    } else if (hiter->full_hash.prefix < *piter) {
144      ++hiter;
145    } else {
146      if (expire_time < last_update ||
147          expire_time.ToTimeT() < hiter->received) {
148        SBFullHashResult result;
149        const int list_bit = GetListIdBit(hiter->chunk_id);
150        DCHECK(list_bit == safe_browsing_util::MALWARE ||
151               list_bit == safe_browsing_util::PHISH);
152        if (!safe_browsing_util::GetListName(list_bit, &result.list_name))
153          continue;
154        result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
155        result.hash = hiter->full_hash;
156        full_hits->push_back(result);
157      }
158
159      // Only increment |hiter|, |piter| might have multiple hits.
160      ++hiter;
161    }
162  }
163}
164
165// This function generates a chunk range string for |chunks|. It
166// outputs one chunk range string per list and writes it to the
167// |list_ranges| vector.  We expect |list_ranges| to already be of the
168// right size.  E.g., if |chunks| contains chunks with two different
169// list ids then |list_ranges| must contain two elements.
170void GetChunkRanges(const std::vector<int>& chunks,
171                    std::vector<std::string>* list_ranges) {
172  DCHECK_GT(list_ranges->size(), 0U);
173  DCHECK_LE(list_ranges->size(), 2U);
174  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
175  for (std::vector<int>::const_iterator iter = chunks.begin();
176       iter != chunks.end(); ++iter) {
177    int mod_list_id = GetListIdBit(*iter);
178    DCHECK_GE(mod_list_id, 0);
179    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
180    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
181  }
182  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
183    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
184  }
185}
186
187// Helper function to create chunk range lists for Browse related
188// lists.
189void UpdateChunkRanges(SafeBrowsingStore* store,
190                       const std::vector<std::string>& listnames,
191                       std::vector<SBListChunkRanges>* lists) {
192  DCHECK_GT(listnames.size(), 0U);
193  DCHECK_LE(listnames.size(), 2U);
194  std::vector<int> add_chunks;
195  std::vector<int> sub_chunks;
196  store->GetAddChunks(&add_chunks);
197  store->GetSubChunks(&sub_chunks);
198
199  std::vector<std::string> adds(listnames.size());
200  std::vector<std::string> subs(listnames.size());
201  GetChunkRanges(add_chunks, &adds);
202  GetChunkRanges(sub_chunks, &subs);
203
204  for (size_t i = 0; i < listnames.size(); ++i) {
205    const std::string& listname = listnames[i];
206    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
207              static_cast<int>(i % 2));
208    DCHECK_NE(safe_browsing_util::GetListId(listname),
209              safe_browsing_util::INVALID);
210    lists->push_back(SBListChunkRanges(listname));
211    lists->back().adds.swap(adds[i]);
212    lists->back().subs.swap(subs[i]);
213  }
214}
215
216// Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
217// safe_browsing_store.h orders on both chunk-id and prefix.
218bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
219  return a.full_hash.prefix < b.full_hash.prefix;
220}
221
222// As compared to the bloom filter, PrefixSet should have these
223// properties:
224// - Any bloom filter miss should be a prefix set miss.
225// - Any prefix set hit should be a bloom filter hit.
226// - Bloom filter false positives are prefix set misses.
227// The following is to log actual performance to verify this.
228enum PrefixSetEvent {
229  PREFIX_SET_EVENT_HIT,
230  PREFIX_SET_EVENT_BLOOM_HIT,
231  PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
232  PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
233  PREFIX_SET_GETPREFIXES_BROKEN,
234  PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
235  PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
236  PREFIX_SET_SBPREFIX_WAS_BROKEN,
237  PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
238  PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
239  PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
240  PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
241  PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,
242
243  // Memory space for histograms is determined by the max.  ALWAYS ADD
244  // NEW VALUES BEFORE THIS ONE.
245  PREFIX_SET_EVENT_MAX
246};
247
248void RecordPrefixSetInfo(PrefixSetEvent event_type) {
249  UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
250                            PREFIX_SET_EVENT_MAX);
251}
252
253// Generate a |PrefixSet| instance from the contents of
254// |add_prefixes|.  Additionally performs various checks to make sure
255// that the resulting prefix set is valid, so that the
256// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
257// ContainsBrowseUrl() can be trustworthy.
258safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
259    const std::vector<SBAddPrefix>& add_prefixes) {
260  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
261  // could be passed directly to |PrefixSet()|, removing the need for
262  // |prefixes|.  For now, |prefixes| is useful while debugging
263  // things.
264  std::vector<SBPrefix> prefixes;
265  for (size_t i = 0; i < add_prefixes.size(); ++i) {
266    prefixes.push_back(add_prefixes[i].prefix);
267  }
268
269  std::sort(prefixes.begin(), prefixes.end());
270  prefixes.erase(std::unique(prefixes.begin(), prefixes.end()),
271                 prefixes.end());
272
273  scoped_ptr<safe_browsing::PrefixSet>
274      prefix_set(new safe_browsing::PrefixSet(prefixes));
275
276  std::vector<SBPrefix> restored;
277  prefix_set->GetPrefixes(&restored);
278
279  // Expect them to be equal.
280  if (restored.size() == prefixes.size() &&
281      std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
282    return prefix_set.release();
283
284  // Log BROKEN for continuity with previous release, and SIZE to
285  // distinguish which test failed.
286  NOTREACHED();
287  RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
288  if (restored.size() != prefixes.size())
289    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);
290
291  // Try to distinguish between updates from one broken user and a
292  // distributed problem.
293  static bool logged_broken = false;
294  if (!logged_broken) {
295    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
296    logged_broken = true;
297  }
298
299  // This seems so very very unlikely.  But if it ever were true, then
300  // it could explain why GetPrefixes() seemed broken.
301  if (sizeof(int) != sizeof(int32))
302    RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);
303
304  // Check if memory was corrupted during construction.
305  if (!prefix_set->CheckChecksum())
306    RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);
307
308  // Check whether |restored| is unsorted, or has duplication.
309  if (restored.size()) {
310    size_t unsorted_count = 0;
311    bool duplicates = false;
312    SBPrefix prev = restored[0];
313    for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
314      if (prev > restored[i]) {
315        unsorted_count++;
316        UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
317                             prev - restored[i]);
318
319        // When unsorted, how big is the set, and how far are we into
320        // it.  If the set is very small or large, that might inform
321        // pursuit of a degenerate case.  If the percentage is close
322        // to 0%, 100%, or 50%, then there might be an interesting
323        // degenerate case to explore.
324        UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
325        UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
326                                 i * 100 / restored.size());
327
328        if (prefix_set->IsDeltaAt(i)) {
329          RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);
330
331          // Histograms require memory on the order of the number of
332          // buckets, making high-precision logging expensive.  For
333          // now aim for a sense of the range of the problem.
334          UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
335                                      prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
336        } else {
337          RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
338        }
339      }
340      if (prev == restored[i])
341        duplicates = true;
342    }
343
344    // Record findings.
345    if (unsorted_count) {
346      RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
347      UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
348    }
349    if (duplicates)
350      RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);
351
352    // Fix the problems noted.  If |restored| was unsorted, then
353    // |duplicates| may give a false negative.
354    if (unsorted_count)
355      std::sort(restored.begin(), restored.end());
356    if (unsorted_count || duplicates)
357      restored.erase(std::unique(restored.begin(), restored.end()),
358                     restored.end());
359  }
360
361  // NOTE(shess): The following could be done using a single
362  // uber-loop, but it's complicated by needing multiple parallel
363  // iterators.  Didn't seem worthwhile for something that will only
364  // live for a short period and only fires for one in a million
365  // updates.
366
367  // Find elements in |restored| which are not in |prefixes|.
368  std::vector<SBPrefix> difference;
369  std::set_difference(restored.begin(), restored.end(),
370                      prefixes.begin(), prefixes.end(),
371                      std::back_inserter(difference));
372  if (difference.size())
373    UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());
374
375  // Find elements in |prefixes| which are not in |restored|.
376  difference.clear();
377  std::set_difference(prefixes.begin(), prefixes.end(),
378                      restored.begin(), restored.end(),
379                      std::back_inserter(difference));
380  if (difference.size())
381    UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
382                             difference.size());
383
384  return prefix_set.release();
385}
386
387}  // namespace
388
389// The default SafeBrowsingDatabaseFactory.
390class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
391 public:
392  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
393      bool enable_download_protection,
394      bool enable_client_side_whitelist) {
395    return new SafeBrowsingDatabaseNew(
396        new SafeBrowsingStoreFile,
397        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
398        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL);
399  }
400
401  SafeBrowsingDatabaseFactoryImpl() { }
402
403 private:
404  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
405};
406
407// static
408SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
409
410// Factory method, non-thread safe. Caller has to make sure this s called
411// on SafeBrowsing Thread.
412// TODO(shess): There's no need for a factory any longer.  Convert
413// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
414// callers just construct things directly.
415SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
416    bool enable_download_protection,
417    bool enable_client_side_whitelist) {
418  if (!factory_)
419    factory_ = new SafeBrowsingDatabaseFactoryImpl();
420  return factory_->CreateSafeBrowsingDatabase(enable_download_protection,
421                                              enable_client_side_whitelist);
422}
423
424SafeBrowsingDatabase::~SafeBrowsingDatabase() {
425}
426
427// static
428FilePath SafeBrowsingDatabase::BrowseDBFilename(
429         const FilePath& db_base_filename) {
430  return FilePath(db_base_filename.value() + kBrowseDBFile);
431}
432
433// static
434FilePath SafeBrowsingDatabase::DownloadDBFilename(
435    const FilePath& db_base_filename) {
436  return FilePath(db_base_filename.value() + kDownloadDBFile);
437}
438
439// static
440FilePath SafeBrowsingDatabase::BloomFilterForFilename(
441    const FilePath& db_filename) {
442  return FilePath(db_filename.value() + kBloomFilterFile);
443}
444
445// static
446FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
447    const FilePath& db_filename) {
448  return FilePath(db_filename.value() + kCsdWhitelistDBFile);
449}
450
451SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
452  DVLOG(3) << "Get store for list: " << list_id;
453  if (list_id == safe_browsing_util::PHISH ||
454      list_id == safe_browsing_util::MALWARE) {
455    return browse_store_.get();
456  } else if (list_id == safe_browsing_util::BINURL ||
457             list_id == safe_browsing_util::BINHASH) {
458    return download_store_.get();
459  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
460    return csd_whitelist_store_.get();
461  }
462  return NULL;
463}
464
465// static
466void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
467  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
468                            FAILURE_DATABASE_MAX);
469}
470
471SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
472    : creation_loop_(MessageLoop::current()),
473      browse_store_(new SafeBrowsingStoreFile),
474      download_store_(NULL),
475      csd_whitelist_store_(NULL),
476      ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) {
477  DCHECK(browse_store_.get());
478  DCHECK(!download_store_.get());
479  DCHECK(!csd_whitelist_store_.get());
480}
481
482SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
483    SafeBrowsingStore* browse_store,
484    SafeBrowsingStore* download_store,
485    SafeBrowsingStore* csd_whitelist_store)
486    : creation_loop_(MessageLoop::current()),
487      browse_store_(browse_store),
488      download_store_(download_store),
489      csd_whitelist_store_(csd_whitelist_store),
490      ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)),
491      corruption_detected_(false) {
492  DCHECK(browse_store_.get());
493}
494
495SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
496  DCHECK_EQ(creation_loop_, MessageLoop::current());
497}
498
499void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
500  DCHECK_EQ(creation_loop_, MessageLoop::current());
501  // Ensure we haven't been run before.
502  DCHECK(browse_filename_.empty());
503  DCHECK(download_filename_.empty());
504  DCHECK(csd_whitelist_filename_.empty());
505
506  browse_filename_ = BrowseDBFilename(filename_base);
507  bloom_filter_filename_ = BloomFilterForFilename(browse_filename_);
508
509  browse_store_->Init(
510      browse_filename_,
511      NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
512  DVLOG(1) << "Init browse store: " << browse_filename_.value();
513
514  {
515    // NOTE: There is no need to grab the lock in this function, since
516    // until it returns, there are no pointers to this class on other
517    // threads.  Then again, that means there is no possibility of
518    // contention on the lock...
519    base::AutoLock locked(lookup_lock_);
520    full_browse_hashes_.clear();
521    pending_browse_hashes_.clear();
522    LoadBloomFilter();
523  }
524
525  if (download_store_.get()) {
526    download_filename_ = DownloadDBFilename(filename_base);
527    download_store_->Init(
528        download_filename_,
529        NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
530    DVLOG(1) << "Init download store: " << download_filename_.value();
531  }
532
533  if (csd_whitelist_store_.get()) {
534    csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
535    csd_whitelist_store_->Init(
536        csd_whitelist_filename_,
537        NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
538    DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
539    std::vector<SBAddFullHash> full_hashes;
540    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
541      LoadCsdWhitelist(full_hashes);
542    } else {
543      CsdWhitelistAllUrls();
544    }
545  } else {
546    CsdWhitelistAllUrls();  // Just to be safe.
547  }
548}
549
550bool SafeBrowsingDatabaseNew::ResetDatabase() {
551  DCHECK_EQ(creation_loop_, MessageLoop::current());
552
553  // Delete files on disk.
554  // TODO(shess): Hard to see where one might want to delete without a
555  // reset.  Perhaps inline |Delete()|?
556  if (!Delete())
557    return false;
558
559  // Reset objects in memory.
560  {
561    base::AutoLock locked(lookup_lock_);
562    full_browse_hashes_.clear();
563    pending_browse_hashes_.clear();
564    prefix_miss_cache_.clear();
565    // TODO(shess): This could probably be |bloom_filter_.reset()|.
566    browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
567                                           BloomFilter::kBloomFilterSizeRatio);
568    // TODO(shess): It is simpler for the code to assume that presence
569    // of a bloom filter always implies presence of a prefix set.
570    prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
571  }
572  // Wants to acquire the lock itself.
573  CsdWhitelistAllUrls();
574
575  return true;
576}
577
578// TODO(lzheng): Remove matching_list, it is not used anywhere.
579bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
580    const GURL& url,
581    std::string* matching_list,
582    std::vector<SBPrefix>* prefix_hits,
583    std::vector<SBFullHashResult>* full_hits,
584    base::Time last_update) {
585  // Clear the results first.
586  matching_list->clear();
587  prefix_hits->clear();
588  full_hits->clear();
589
590  std::vector<SBFullHash> full_hashes;
591  BrowseFullHashesToCheck(url, false, &full_hashes);
592  if (full_hashes.empty())
593    return false;
594
595  // This function is called on the I/O thread, prevent changes to
596  // bloom filter and caches.
597  base::AutoLock locked(lookup_lock_);
598
599  if (!browse_bloom_filter_.get())
600    return false;
601  DCHECK(prefix_set_.get());
602
603  // Used to double-check in case of a hit mis-match.
604  std::vector<SBPrefix> restored;
605
606  size_t miss_count = 0;
607  for (size_t i = 0; i < full_hashes.size(); ++i) {
608    bool found = prefix_set_->Exists(full_hashes[i].prefix);
609
610    if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
611      RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
612      if (found)
613        RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
614      prefix_hits->push_back(full_hashes[i].prefix);
615      if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
616        ++miss_count;
617    } else {
618      // Bloom filter misses should never be in prefix set.  Re-create
619      // the original prefixes and manually search for it, to check if
620      // there's a bug with how |Exists()| is implemented.
621      // |UpdateBrowseStore()| previously verified that
622      // |GetPrefixes()| returns the same prefixes as were passed to
623      // the constructor.
624      DCHECK(!found);
625      if (found) {
626        if (restored.empty())
627          prefix_set_->GetPrefixes(&restored);
628
629        // If the item is not in the re-created list, then there is an
630        // error in |PrefixSet::Exists()|.  If the item is in the
631        // re-created list, then the bloom filter was wrong.
632        if (std::binary_search(restored.begin(), restored.end(),
633                               full_hashes[i].prefix)) {
634          RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
635        } else {
636          RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
637        }
638      }
639    }
640  }
641
642  // If all the prefixes are cached as 'misses', don't issue a GetHash.
643  if (miss_count == prefix_hits->size())
644    return false;
645
646  // Find the matching full-hash results.  |full_browse_hashes_| are from the
647  // database, |pending_browse_hashes_| are from GetHash requests between
648  // updates.
649  std::sort(prefix_hits->begin(), prefix_hits->end());
650
651  GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
652                               full_hits, last_update);
653  GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
654                               full_hits, last_update);
655  return true;
656}
657
658bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes(
659    int list_bit,
660    const std::vector<SBPrefix>& prefixes,
661    std::vector<SBPrefix>* prefix_hits) {
662  prefix_hits->clear();
663
664  std::vector<SBAddPrefix> add_prefixes;
665  download_store_->GetAddPrefixes(&add_prefixes);
666  for (size_t i = 0; i < add_prefixes.size(); ++i) {
667    for (size_t j = 0; j < prefixes.size(); ++j) {
668      const SBPrefix& prefix = prefixes[j];
669      if (prefix == add_prefixes[i].prefix &&
670          GetListIdBit(add_prefixes[i].chunk_id) == list_bit) {
671        prefix_hits->push_back(prefix);
672      }
673    }
674  }
675  return !prefix_hits->empty();
676}
677
678bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
679    const std::vector<GURL>& urls,
680    std::vector<SBPrefix>* prefix_hits) {
681  DCHECK_EQ(creation_loop_, MessageLoop::current());
682
683  // Ignore this check when download checking is not enabled.
684  if (!download_store_.get())
685    return false;
686
687  std::vector<SBPrefix> prefixes;
688  GetDownloadUrlPrefixes(urls, &prefixes);
689  return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2,
690                                  prefixes,
691                                  prefix_hits);
692}
693
694bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
695    const SBPrefix& prefix) {
696  DCHECK_EQ(creation_loop_, MessageLoop::current());
697
698  // Ignore this check when download store is not available.
699  if (!download_store_.get())
700    return false;
701
702  std::vector<SBPrefix> prefixes(1, prefix);
703  std::vector<SBPrefix> prefix_hits;
704  return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2,
705                                  prefixes,
706                                  &prefix_hits);
707}
708
709bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
710  // This method is theoretically thread-safe but we expect all calls to
711  // originate from the IO thread.
712  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
713  base::AutoLock l(lookup_lock_);
714  if (csd_whitelist_all_urls_)
715    return true;
716
717  std::vector<SBFullHash> full_hashes;
718  BrowseFullHashesToCheck(url, true, &full_hashes);
719  for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin();
720       it != full_hashes.end(); ++it) {
721    if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it))
722      return true;
723  }
724  return false;
725}
726
727// Helper to insert entries for all of the prefixes or full hashes in
728// |entry| into the store.
729void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
730                                        const SBEntry* entry, int list_id) {
731  DCHECK_EQ(creation_loop_, MessageLoop::current());
732
733  SafeBrowsingStore* store = GetStore(list_id);
734  if (!store) return;
735
736  STATS_COUNTER("SB.HostInsert", 1);
737  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
738  const int count = entry->prefix_count();
739
740  DCHECK(!entry->IsSub());
741  if (!count) {
742    // No prefixes, use host instead.
743    STATS_COUNTER("SB.PrefixAdd", 1);
744    store->WriteAddPrefix(encoded_chunk_id, host);
745  } else if (entry->IsPrefix()) {
746    // Prefixes only.
747    for (int i = 0; i < count; i++) {
748      const SBPrefix prefix = entry->PrefixAt(i);
749      STATS_COUNTER("SB.PrefixAdd", 1);
750      store->WriteAddPrefix(encoded_chunk_id, prefix);
751    }
752  } else {
753    // Prefixes and hashes.
754    const base::Time receive_time = base::Time::Now();
755    for (int i = 0; i < count; ++i) {
756      const SBFullHash full_hash = entry->FullHashAt(i);
757      const SBPrefix prefix = full_hash.prefix;
758
759      STATS_COUNTER("SB.PrefixAdd", 1);
760      store->WriteAddPrefix(encoded_chunk_id, prefix);
761
762      STATS_COUNTER("SB.PrefixAddFull", 1);
763      store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
764    }
765  }
766}
767
768// Helper to iterate over all the entries in the hosts in |chunks| and
769// add them to the store.
770void SafeBrowsingDatabaseNew::InsertAddChunks(const int list_id,
771                                              const SBChunkList& chunks) {
772  DCHECK_EQ(creation_loop_, MessageLoop::current());
773
774  SafeBrowsingStore* store = GetStore(list_id);
775  if (!store) return;
776
777  for (SBChunkList::const_iterator citer = chunks.begin();
778       citer != chunks.end(); ++citer) {
779    const int chunk_id = citer->chunk_number;
780
781    // The server can give us a chunk that we already have because
782    // it's part of a range.  Don't add it again.
783    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
784    if (store->CheckAddChunk(encoded_chunk_id))
785      continue;
786
787    store->SetAddChunk(encoded_chunk_id);
788    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
789         hiter != citer->hosts.end(); ++hiter) {
790      // NOTE: Could pass |encoded_chunk_id|, but then inserting add
791      // chunks would look different from inserting sub chunks.
792      InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
793    }
794  }
795}
796
797// Helper to insert entries for all of the prefixes or full hashes in
798// |entry| into the store.
799void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
800                                        const SBEntry* entry, int list_id) {
801  DCHECK_EQ(creation_loop_, MessageLoop::current());
802
803  SafeBrowsingStore* store = GetStore(list_id);
804  if (!store) return;
805
806  STATS_COUNTER("SB.HostDelete", 1);
807  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
808  const int count = entry->prefix_count();
809
810  DCHECK(entry->IsSub());
811  if (!count) {
812    // No prefixes, use host instead.
813    STATS_COUNTER("SB.PrefixSub", 1);
814    const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
815    store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
816  } else if (entry->IsPrefix()) {
817    // Prefixes only.
818    for (int i = 0; i < count; i++) {
819      const SBPrefix prefix = entry->PrefixAt(i);
820      const int add_chunk_id =
821          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
822
823      STATS_COUNTER("SB.PrefixSub", 1);
824      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
825    }
826  } else {
827    // Prefixes and hashes.
828    for (int i = 0; i < count; ++i) {
829      const SBFullHash full_hash = entry->FullHashAt(i);
830      const int add_chunk_id =
831          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
832
833      STATS_COUNTER("SB.PrefixSub", 1);
834      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
835
836      STATS_COUNTER("SB.PrefixSubFull", 1);
837      store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
838    }
839  }
840}
841
842// Helper to iterate over all the entries in the hosts in |chunks| and
843// add them to the store.
844void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id,
845                                              const SBChunkList& chunks) {
846  DCHECK_EQ(creation_loop_, MessageLoop::current());
847
848  SafeBrowsingStore* store = GetStore(list_id);
849  if (!store) return;
850
851  for (SBChunkList::const_iterator citer = chunks.begin();
852       citer != chunks.end(); ++citer) {
853    const int chunk_id = citer->chunk_number;
854
855    // The server can give us a chunk that we already have because
856    // it's part of a range.  Don't add it again.
857    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
858    if (store->CheckSubChunk(encoded_chunk_id))
859      continue;
860
861    store->SetSubChunk(encoded_chunk_id);
862    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
863         hiter != citer->hosts.end(); ++hiter) {
864      InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
865    }
866  }
867}
868
869void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
870                                           const SBChunkList& chunks) {
871  DCHECK_EQ(creation_loop_, MessageLoop::current());
872
873  if (corruption_detected_ || chunks.empty())
874    return;
875
876  const base::Time insert_start = base::Time::Now();
877
878  const int list_id = safe_browsing_util::GetListId(list_name);
879  DVLOG(2) << list_name << ": " << list_id;
880
881  SafeBrowsingStore* store = GetStore(list_id);
882  if (!store) return;
883
884  change_detected_ = true;
885
886  store->BeginChunk();
887  if (chunks.front().is_add) {
888    InsertAddChunks(list_id, chunks);
889  } else {
890    InsertSubChunks(list_id, chunks);
891  }
892  store->FinishChunk();
893
894  UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start);
895}
896
897void SafeBrowsingDatabaseNew::DeleteChunks(
898    const std::vector<SBChunkDelete>& chunk_deletes) {
899  DCHECK_EQ(creation_loop_, MessageLoop::current());
900
901  if (corruption_detected_ || chunk_deletes.empty())
902    return;
903
904  const std::string& list_name = chunk_deletes.front().list_name;
905  const int list_id = safe_browsing_util::GetListId(list_name);
906
907  SafeBrowsingStore* store = GetStore(list_id);
908  if (!store) return;
909
910  change_detected_ = true;
911
912  for (size_t i = 0; i < chunk_deletes.size(); ++i) {
913    std::vector<int> chunk_numbers;
914    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
915    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
916      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
917      if (chunk_deletes[i].is_sub_del)
918        store->DeleteSubChunk(encoded_chunk_id);
919      else
920        store->DeleteAddChunk(encoded_chunk_id);
921    }
922  }
923}
924
925void SafeBrowsingDatabaseNew::CacheHashResults(
926    const std::vector<SBPrefix>& prefixes,
927    const std::vector<SBFullHashResult>& full_hits) {
928  // This is called on the I/O thread, lock against updates.
929  base::AutoLock locked(lookup_lock_);
930
931  if (full_hits.empty()) {
932    prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
933    return;
934  }
935
936  // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
937  // Refactor to make them identical.
938  const base::Time now = base::Time::Now();
939  const size_t orig_size = pending_browse_hashes_.size();
940  for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
941       iter != full_hits.end(); ++iter) {
942    const int list_id = safe_browsing_util::GetListId(iter->list_name);
943    if (list_id == safe_browsing_util::MALWARE ||
944        list_id == safe_browsing_util::PHISH) {
945      int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
946      SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
947      pending_browse_hashes_.push_back(add_full_hash);
948    }
949  }
950
951  // Sort new entries then merge with the previously-sorted entries.
952  std::vector<SBAddFullHash>::iterator
953      orig_end = pending_browse_hashes_.begin() + orig_size;
954  std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
955  std::inplace_merge(pending_browse_hashes_.begin(),
956                     orig_end, pending_browse_hashes_.end(),
957                     SBAddFullHashPrefixLess);
958}
959
960bool SafeBrowsingDatabaseNew::UpdateStarted(
961    std::vector<SBListChunkRanges>* lists) {
962  DCHECK_EQ(creation_loop_, MessageLoop::current());
963  DCHECK(lists);
964
965  // If |BeginUpdate()| fails, reset the database.
966  if (!browse_store_->BeginUpdate()) {
967    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
968    HandleCorruptDatabase();
969    return false;
970  }
971
972  if (download_store_.get() && !download_store_->BeginUpdate()) {
973    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
974    HandleCorruptDatabase();
975    return false;
976  }
977
978  if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
979    RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN);
980    HandleCorruptDatabase();
981    return false;
982  }
983
984  std::vector<std::string> browse_listnames;
985  browse_listnames.push_back(safe_browsing_util::kMalwareList);
986  browse_listnames.push_back(safe_browsing_util::kPhishingList);
987  UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);
988
989  if (download_store_.get()) {
990    std::vector<std::string> download_listnames;
991    download_listnames.push_back(safe_browsing_util::kBinUrlList);
992    download_listnames.push_back(safe_browsing_util::kBinHashList);
993    UpdateChunkRanges(download_store_.get(), download_listnames, lists);
994  }
995
996  if (csd_whitelist_store_.get()) {
997    std::vector<std::string> csd_whitelist_listnames;
998    csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
999    UpdateChunkRanges(csd_whitelist_store_.get(),
1000                      csd_whitelist_listnames, lists);
1001  }
1002
1003  corruption_detected_ = false;
1004  change_detected_ = false;
1005  return true;
1006}
1007
1008void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1009  DCHECK_EQ(creation_loop_, MessageLoop::current());
1010  if (corruption_detected_)
1011    return;
1012
1013  // Unroll the transaction if there was a protocol error or if the
1014  // transaction was empty.  This will leave the bloom filter, the
1015  // pending hashes, and the prefix miss cache in place.
1016  if (!update_succeeded || !change_detected_) {
1017    // Track empty updates to answer questions at http://crbug.com/72216 .
1018    if (update_succeeded && !change_detected_)
1019      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1020    browse_store_->CancelUpdate();
1021    if (download_store_.get())
1022      download_store_->CancelUpdate();
1023    if (csd_whitelist_store_.get())
1024      csd_whitelist_store_->CancelUpdate();
1025    return;
1026  }
1027
1028  // for download
1029  UpdateDownloadStore();
1030  // for browsing
1031  UpdateBrowseStore();
1032  // for csd whitelist
1033  UpdateCsdWhitelistStore();
1034}
1035
1036void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() {
1037  if (!csd_whitelist_store_.get())
1038    return;
1039
1040  // For the csd whitelist, we don't cache and save full hashes since all
1041  // hashes are already full.
1042  std::vector<SBAddFullHash> empty_add_hashes;
1043
1044  // Not needed for the csd whitelist.
1045  std::set<SBPrefix> empty_miss_cache;
1046
1047  // Note: prefixes will not be empty.  The current data store implementation
1048  // stores all full-length hashes as both full and prefix hashes.
1049  std::vector<SBAddPrefix> prefixes;
1050  std::vector<SBAddFullHash> full_hashes;
1051  if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes,
1052                                          empty_miss_cache,
1053                                          &prefixes,
1054                                          &full_hashes)) {
1055    RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH);
1056    CsdWhitelistAllUrls();
1057    return;
1058  }
1059  LoadCsdWhitelist(full_hashes);
1060}
1061
1062void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
1063  if (!download_store_.get())
1064    return;
1065
1066  // For download, we don't cache and save full hashes.
1067  std::vector<SBAddFullHash> empty_add_hashes;
1068
1069  // For download, backend lookup happens only if a prefix is in add list.
1070  // No need to pass in miss cache when call FinishUpdate to caculate
1071  // bloomfilter false positives.
1072  std::set<SBPrefix> empty_miss_cache;
1073
1074  // These results are not used after this call. Simply ignore the
1075  // returned value after FinishUpdate(...).
1076  std::vector<SBAddPrefix> add_prefixes_result;
1077  std::vector<SBAddFullHash> add_full_hashes_result;
1078
1079  if (!download_store_->FinishUpdate(empty_add_hashes,
1080                                     empty_miss_cache,
1081                                     &add_prefixes_result,
1082                                     &add_full_hashes_result))
1083    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1084  return;
1085}
1086
1087void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1088  // Copy out the pending add hashes.  Copy rather than swapping in
1089  // case |ContainsBrowseURL()| is called before the new filter is complete.
1090  std::vector<SBAddFullHash> pending_add_hashes;
1091  {
1092    base::AutoLock locked(lookup_lock_);
1093    pending_add_hashes.insert(pending_add_hashes.end(),
1094                              pending_browse_hashes_.begin(),
1095                              pending_browse_hashes_.end());
1096  }
1097
1098  // Measure the amount of IO during the bloom filter build.
1099  base::IoCounters io_before, io_after;
1100  base::ProcessHandle handle = base::Process::Current().handle();
1101  scoped_ptr<base::ProcessMetrics> metric(
1102#if !defined(OS_MACOSX)
1103      base::ProcessMetrics::CreateProcessMetrics(handle)
1104#else
1105      // Getting stats only for the current process is enough, so NULL is fine.
1106      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1107#endif
1108  );
1109
1110  // IoCounters are currently not supported on Mac, and may not be
1111  // available for Linux, so we check the result and only show IO
1112  // stats if they are available.
1113  const bool got_counters = metric->GetIOCounters(&io_before);
1114
1115  const base::Time before = base::Time::Now();
1116
1117  std::vector<SBAddPrefix> add_prefixes;
1118  std::vector<SBAddFullHash> add_full_hashes;
1119  if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
1120                                   &add_prefixes, &add_full_hashes)) {
1121    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1122    return;
1123  }
1124
1125  // Create and populate |filter| from |add_prefixes|.
1126  // TODO(shess): The bloom filter doesn't need to be a
1127  // scoped_refptr<> for this code.  Refactor that away.
1128  const int filter_size =
1129      BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
1130  scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size));
1131  for (size_t i = 0; i < add_prefixes.size(); ++i) {
1132    filter->Insert(add_prefixes[i].prefix);
1133  }
1134
1135  scoped_ptr<safe_browsing::PrefixSet>
1136      prefix_set(PrefixSetFromAddPrefixes(add_prefixes));
1137
1138  // This needs to be in sorted order by prefix for efficient access.
1139  std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1140            SBAddFullHashPrefixLess);
1141
1142  // Swap in the newly built filter and cache.
1143  {
1144    base::AutoLock locked(lookup_lock_);
1145    full_browse_hashes_.swap(add_full_hashes);
1146
1147    // TODO(shess): If |CacheHashResults()| is posted between the
1148    // earlier lock and this clear, those pending hashes will be lost.
1149    // It could be fixed by only removing hashes which were collected
1150    // at the earlier point.  I believe that is fail-safe as-is (the
1151    // hash will be fetched again).
1152    pending_browse_hashes_.clear();
1153    prefix_miss_cache_.clear();
1154    browse_bloom_filter_.swap(filter);
1155    prefix_set_.swap(prefix_set);
1156  }
1157
1158  const base::TimeDelta bloom_gen = base::Time::Now() - before;
1159
1160  // Persist the bloom filter to disk.  Since only this thread changes
1161  // |browse_bloom_filter_|, there is no need to lock.
1162  WriteBloomFilter();
1163
1164  // Gather statistics.
1165  if (got_counters && metric->GetIOCounters(&io_after)) {
1166    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1167                         static_cast<int>(io_after.ReadTransferCount -
1168                                          io_before.ReadTransferCount) / 1024);
1169    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1170                         static_cast<int>(io_after.WriteTransferCount -
1171                                          io_before.WriteTransferCount) / 1024);
1172    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1173                         static_cast<int>(io_after.ReadOperationCount -
1174                                          io_before.ReadOperationCount));
1175    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1176                         static_cast<int>(io_after.WriteOperationCount -
1177                                          io_before.WriteOperationCount));
1178  }
1179  DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in "
1180           << bloom_gen.InMilliseconds() << " ms total.  prefix count: "
1181           << add_prefixes.size();
1182  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
1183  UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes",
1184                       browse_bloom_filter_->size() / 1024);
1185  int64 size_64;
1186  if (file_util::GetFileSize(browse_filename_, &size_64))
1187    UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1188                         static_cast<int>(size_64 / 1024));
1189  if (file_util::GetFileSize(download_filename_, &size_64))
1190    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1191                         static_cast<int>(size_64 / 1024));
1192}
1193
1194void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1195  // Reset the database after the current task has unwound (but only
1196  // reset once within the scope of a given task).
1197  if (reset_factory_.empty()) {
1198    RecordFailure(FAILURE_DATABASE_CORRUPT);
1199    MessageLoop::current()->PostTask(FROM_HERE,
1200        reset_factory_.NewRunnableMethod(
1201            &SafeBrowsingDatabaseNew::OnHandleCorruptDatabase));
1202  }
1203}
1204
1205void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1206  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1207  corruption_detected_ = true;  // Stop updating the database.
1208  ResetDatabase();
1209  DCHECK(false) << "SafeBrowsing database was corrupt and reset";
1210}
1211
1212// TODO(shess): I'm not clear why this code doesn't have any
1213// real error-handling.
1214void SafeBrowsingDatabaseNew::LoadBloomFilter() {
1215  DCHECK_EQ(creation_loop_, MessageLoop::current());
1216  DCHECK(!bloom_filter_filename_.empty());
1217
1218  // If we're missing either of the database or filter files, we wait until the
1219  // next update to generate a new filter.
1220  // TODO(paulg): Investigate how often the filter file is missing and how
1221  // expensive it would be to regenerate it.
1222  int64 size_64;
1223  if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0)
1224    return;
1225
1226  if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) ||
1227      size_64 == 0) {
1228    RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
1229    return;
1230  }
1231
1232  const base::TimeTicks before = base::TimeTicks::Now();
1233  browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
1234  DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
1235           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1236
1237  if (!browse_bloom_filter_.get())
1238    RecordFailure(FAILURE_DATABASE_FILTER_READ);
1239
1240  // Manually re-generate the prefix set from the main database.
1241  // TODO(shess): Write/read for prefix set.
1242  std::vector<SBAddPrefix> add_prefixes;
1243  browse_store_->GetAddPrefixes(&add_prefixes);
1244  prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes));
1245}
1246
1247bool SafeBrowsingDatabaseNew::Delete() {
1248  DCHECK_EQ(creation_loop_, MessageLoop::current());
1249
1250  const bool r1 = browse_store_->Delete();
1251  if (!r1)
1252    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1253
1254  const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1255  if (!r2)
1256    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1257
1258  const bool r3 = csd_whitelist_store_.get() ?
1259      csd_whitelist_store_->Delete() : true;
1260  if (!r3)
1261    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1262
1263  const bool r4 = file_util::Delete(bloom_filter_filename_, false);
1264  if (!r4)
1265    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1266  return r1 && r2 && r3 && r4;
1267}
1268
1269void SafeBrowsingDatabaseNew::WriteBloomFilter() {
1270  DCHECK_EQ(creation_loop_, MessageLoop::current());
1271
1272  if (!browse_bloom_filter_.get())
1273    return;
1274
1275  const base::TimeTicks before = base::TimeTicks::Now();
1276  const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
1277  DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
1278           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1279
1280  if (!write_ok)
1281    RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
1282}
1283
1284void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() {
1285  base::AutoLock locked(lookup_lock_);
1286  csd_whitelist_all_urls_ = true;
1287  csd_whitelist_.clear();
1288}
1289
1290void SafeBrowsingDatabaseNew::LoadCsdWhitelist(
1291    const std::vector<SBAddFullHash>& full_hashes) {
1292  DCHECK_EQ(creation_loop_, MessageLoop::current());
1293  if (full_hashes.size() > kMaxCsdWhitelistSize) {
1294    CsdWhitelistAllUrls();
1295    return;
1296  }
1297
1298  std::vector<SBFullHash> new_csd_whitelist;
1299  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1300       it != full_hashes.end(); ++it) {
1301    new_csd_whitelist.push_back(it->full_hash);
1302  }
1303  std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end());
1304
1305  SBFullHash kill_switch;
1306  crypto::SHA256HashString(kCsdKillSwitchUrl, &kill_switch,
1307                           sizeof(kill_switch));
1308  if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(),
1309                         kill_switch)) {
1310    // The kill switch is whitelisted hence we whitelist all URLs.
1311    CsdWhitelistAllUrls();
1312  } else {
1313    base::AutoLock locked(lookup_lock_);
1314    csd_whitelist_all_urls_ = false;
1315    csd_whitelist_.swap(new_csd_whitelist);
1316  }
1317}
1318