safe_browsing_database.cc revision bbcdd45c55eb7c4641ab97aef9889b0fc828e7d3
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7#include <algorithm>
8#include <iterator>
9
10#include "base/bind.h"
11#include "base/file_util.h"
12#include "base/message_loop/message_loop.h"
13#include "base/metrics/histogram.h"
14#include "base/metrics/stats_counters.h"
15#include "base/process/process_metrics.h"
16#include "base/time/time.h"
17#include "chrome/browser/safe_browsing/prefix_set.h"
18#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
19#include "content/public/browser/browser_thread.h"
20#include "crypto/sha2.h"
21#include "url/gurl.h"
22
23#if defined(OS_MACOSX)
24#include "base/mac/mac_util.h"
25#endif
26
27using content::BrowserThread;
28
29namespace {
30
31// Filename suffix for the bloom filter.
32const base::FilePath::CharType kBloomFilterFile[] =
33    FILE_PATH_LITERAL(" Filter 2");
34// Filename suffix for the prefix set.
35const base::FilePath::CharType kPrefixSetFile[] =
36    FILE_PATH_LITERAL(" Prefix Set");
37// Filename suffix for download store.
38const base::FilePath::CharType kDownloadDBFile[] =
39    FILE_PATH_LITERAL(" Download");
40// Filename suffix for client-side phishing detection whitelist store.
41const base::FilePath::CharType kCsdWhitelistDBFile[] =
42    FILE_PATH_LITERAL(" Csd Whitelist");
43// Filename suffix for the download whitelist store.
44const base::FilePath::CharType kDownloadWhitelistDBFile[] =
45    FILE_PATH_LITERAL(" Download Whitelist");
46// Filename suffix for the extension blacklist store.
47const base::FilePath::CharType kExtensionBlacklistDBFile[] =
48    FILE_PATH_LITERAL(" Extension Blacklist");
49// Filename suffix for the side-effect free whitelist store.
50const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
51    FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
52// Filename suffix for browse store.
53// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
54// Unfortunately, to change the name implies lots of transition code
55// for little benefit.  If/when file formats change (say to put all
56// the data in one file), that would be a convenient point to rectify
57// this.
58const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
59
60// The maximum staleness for a cached entry.
61const int kMaxStalenessMinutes = 45;
62
63// Maximum number of entries we allow in any of the whitelists.
64// If a whitelist on disk contains more entries then all lookups to
65// the whitelist will be considered a match.
66const size_t kMaxWhitelistSize = 5000;
67
68// If the hash of this exact expression is on a whitelist then all
69// lookups to this whitelist will be considered a match.
70const char kWhitelistKillSwitchUrl[] =
71    "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
72
73// To save space, the incoming |chunk_id| and |list_id| are combined
74// into an |encoded_chunk_id| for storage by shifting the |list_id|
75// into the low-order bits.  These functions decode that information.
76// TODO(lzheng): It was reasonable when database is saved in sqlite, but
77// there should be better ways to save chunk_id and list_id after we use
78// SafeBrowsingStoreFile.
79int GetListIdBit(const int encoded_chunk_id) {
80  return encoded_chunk_id & 1;
81}
82int DecodeChunkId(int encoded_chunk_id) {
83  return encoded_chunk_id >> 1;
84}
85int EncodeChunkId(const int chunk, const int list_id) {
86  DCHECK_NE(list_id, safe_browsing_util::INVALID);
87  return chunk << 1 | list_id % 2;
88}
89
90// Generate the set of full hashes to check for |url|.  If
91// |include_whitelist_hashes| is true we will generate additional path-prefixes
92// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
93// whitelist it should also match /foo/bar which is not the case for all the
94// other lists.  We'll also always add a pattern for the empty path.
95// TODO(shess): This function is almost the same as
96// |CompareFullHashes()| in safe_browsing_util.cc, except that code
97// does an early exit on match.  Since match should be the infrequent
98// case (phishing or malware found), consider combining this function
99// with that one.
100void BrowseFullHashesToCheck(const GURL& url,
101                             bool include_whitelist_hashes,
102                             std::vector<SBFullHash>* full_hashes) {
103  std::vector<std::string> hosts;
104  if (url.HostIsIPAddress()) {
105    hosts.push_back(url.host());
106  } else {
107    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
108  }
109
110  std::vector<std::string> paths;
111  safe_browsing_util::GeneratePathsToCheck(url, &paths);
112
113  for (size_t i = 0; i < hosts.size(); ++i) {
114    for (size_t j = 0; j < paths.size(); ++j) {
115      const std::string& path = paths[j];
116      SBFullHash full_hash;
117      crypto::SHA256HashString(hosts[i] + path, &full_hash,
118                               sizeof(full_hash));
119      full_hashes->push_back(full_hash);
120
121      // We may have /foo as path-prefix in the whitelist which should
122      // also match with /foo/bar and /foo?bar.  Hence, for every path
123      // that ends in '/' we also add the path without the slash.
124      if (include_whitelist_hashes &&
125          path.size() > 1 &&
126          path[path.size() - 1] == '/') {
127        crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
128                                 &full_hash, sizeof(full_hash));
129        full_hashes->push_back(full_hash);
130      }
131    }
132  }
133}
134
135// Get the prefixes matching the download |urls|.
136void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
137                            std::vector<SBPrefix>* prefixes) {
138  std::vector<SBFullHash> full_hashes;
139  for (size_t i = 0; i < urls.size(); ++i)
140    BrowseFullHashesToCheck(urls[i], false, &full_hashes);
141
142  for (size_t i = 0; i < full_hashes.size(); ++i)
143    prefixes->push_back(full_hashes[i].prefix);
144}
145
146// Helper function to compare addprefixes in |store| with |prefixes|.
147// The |list_bit| indicates which list (url or hash) to compare.
148//
149// Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
150// the actual matching prefixes.
151bool MatchAddPrefixes(SafeBrowsingStore* store,
152                      int list_bit,
153                      const std::vector<SBPrefix>& prefixes,
154                      std::vector<SBPrefix>* prefix_hits) {
155  prefix_hits->clear();
156  bool found_match = false;
157
158  SBAddPrefixes add_prefixes;
159  store->GetAddPrefixes(&add_prefixes);
160  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
161       iter != add_prefixes.end(); ++iter) {
162    for (size_t j = 0; j < prefixes.size(); ++j) {
163      const SBPrefix& prefix = prefixes[j];
164      if (prefix == iter->prefix &&
165          GetListIdBit(iter->chunk_id) == list_bit) {
166        prefix_hits->push_back(prefix);
167        found_match = true;
168      }
169    }
170  }
171  return found_match;
172}
173
174// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
175// add them to |full_hits| if not expired.  "Not expired" is when
176// either |last_update| was recent enough, or the item has been
177// received recently enough.  Expired items are not deleted because a
178// future update may make them acceptable again.
179//
180// For efficiency reasons the code walks |prefix_hits| and
181// |full_hashes| in parallel, so they must be sorted by prefix.
182void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
183                                  const std::vector<SBAddFullHash>& full_hashes,
184                                  std::vector<SBFullHashResult>* full_hits,
185                                  base::Time last_update) {
186  const base::Time expire_time =
187      base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
188
189  std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
190  std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
191
192  while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
193    if (*piter < hiter->full_hash.prefix) {
194      ++piter;
195    } else if (hiter->full_hash.prefix < *piter) {
196      ++hiter;
197    } else {
198      if (expire_time < last_update ||
199          expire_time.ToTimeT() < hiter->received) {
200        SBFullHashResult result;
201        const int list_bit = GetListIdBit(hiter->chunk_id);
202        DCHECK(list_bit == safe_browsing_util::MALWARE ||
203               list_bit == safe_browsing_util::PHISH);
204        const safe_browsing_util::ListType list_id =
205            static_cast<safe_browsing_util::ListType>(list_bit);
206        if (!safe_browsing_util::GetListName(list_id, &result.list_name))
207          continue;
208        result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
209        result.hash = hiter->full_hash;
210        full_hits->push_back(result);
211      }
212
213      // Only increment |hiter|, |piter| might have multiple hits.
214      ++hiter;
215    }
216  }
217}
218
219// This function generates a chunk range string for |chunks|. It
220// outputs one chunk range string per list and writes it to the
221// |list_ranges| vector.  We expect |list_ranges| to already be of the
222// right size.  E.g., if |chunks| contains chunks with two different
223// list ids then |list_ranges| must contain two elements.
224void GetChunkRanges(const std::vector<int>& chunks,
225                    std::vector<std::string>* list_ranges) {
226  DCHECK_GT(list_ranges->size(), 0U);
227  DCHECK_LE(list_ranges->size(), 2U);
228  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
229  for (std::vector<int>::const_iterator iter = chunks.begin();
230       iter != chunks.end(); ++iter) {
231    int mod_list_id = GetListIdBit(*iter);
232    DCHECK_GE(mod_list_id, 0);
233    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
234    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
235  }
236  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
237    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
238  }
239}
240
241// Helper function to create chunk range lists for Browse related
242// lists.
243void UpdateChunkRanges(SafeBrowsingStore* store,
244                       const std::vector<std::string>& listnames,
245                       std::vector<SBListChunkRanges>* lists) {
246  DCHECK_GT(listnames.size(), 0U);
247  DCHECK_LE(listnames.size(), 2U);
248  std::vector<int> add_chunks;
249  std::vector<int> sub_chunks;
250  store->GetAddChunks(&add_chunks);
251  store->GetSubChunks(&sub_chunks);
252
253  std::vector<std::string> adds(listnames.size());
254  std::vector<std::string> subs(listnames.size());
255  GetChunkRanges(add_chunks, &adds);
256  GetChunkRanges(sub_chunks, &subs);
257
258  for (size_t i = 0; i < listnames.size(); ++i) {
259    const std::string& listname = listnames[i];
260    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
261              static_cast<int>(i % 2));
262    DCHECK_NE(safe_browsing_util::GetListId(listname),
263              safe_browsing_util::INVALID);
264    lists->push_back(SBListChunkRanges(listname));
265    lists->back().adds.swap(adds[i]);
266    lists->back().subs.swap(subs[i]);
267  }
268}
269
270// Helper for deleting chunks left over from obsolete lists.
271void DeleteChunksFromStore(SafeBrowsingStore* store, int listid){
272  std::vector<int> add_chunks;
273  size_t adds_deleted = 0;
274  store->GetAddChunks(&add_chunks);
275  for (std::vector<int>::const_iterator iter = add_chunks.begin();
276       iter != add_chunks.end(); ++iter) {
277    if (GetListIdBit(*iter) == GetListIdBit(listid)) {
278      adds_deleted++;
279      store->DeleteAddChunk(*iter);
280    }
281  }
282  if (adds_deleted > 0)
283    UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashAddsDeleted", adds_deleted);
284
285  std::vector<int> sub_chunks;
286  size_t subs_deleted = 0;
287  store->GetSubChunks(&sub_chunks);
288  for (std::vector<int>::const_iterator iter = sub_chunks.begin();
289       iter != sub_chunks.end(); ++iter) {
290    if (GetListIdBit(*iter) == GetListIdBit(listid)) {
291      subs_deleted++;
292      store->DeleteSubChunk(*iter);
293    }
294  }
295  if (subs_deleted > 0)
296    UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted);
297}
298
299// Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
300// safe_browsing_store.h orders on both chunk-id and prefix.
301bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
302  return a.full_hash.prefix < b.full_hash.prefix;
303}
304
305// This code always checks for non-zero file size.  This helper makes
306// that less verbose.
307int64 GetFileSizeOrZero(const base::FilePath& file_path) {
308  int64 size_64;
309  if (!file_util::GetFileSize(file_path, &size_64))
310    return 0;
311  return size_64;
312}
313
314}  // namespace
315
316// The default SafeBrowsingDatabaseFactory.
317class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
318 public:
319  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
320      bool enable_download_protection,
321      bool enable_client_side_whitelist,
322      bool enable_download_whitelist,
323      bool enable_extension_blacklist,
324      bool enable_side_effect_free_whitelist) OVERRIDE {
325    return new SafeBrowsingDatabaseNew(
326        new SafeBrowsingStoreFile,
327        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
328        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
329        enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
330        enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
331        enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL);
332  }
333
334  SafeBrowsingDatabaseFactoryImpl() { }
335
336 private:
337  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
338};
339
340// static
341SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
342
343// Factory method, non-thread safe. Caller has to make sure this s called
344// on SafeBrowsing Thread.
345// TODO(shess): There's no need for a factory any longer.  Convert
346// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
347// callers just construct things directly.
348SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
349    bool enable_download_protection,
350    bool enable_client_side_whitelist,
351    bool enable_download_whitelist,
352    bool enable_extension_blacklist,
353    bool enable_side_effect_free_whitelist) {
354  if (!factory_)
355    factory_ = new SafeBrowsingDatabaseFactoryImpl();
356  return factory_->CreateSafeBrowsingDatabase(
357      enable_download_protection,
358      enable_client_side_whitelist,
359      enable_download_whitelist,
360      enable_extension_blacklist,
361      enable_side_effect_free_whitelist);
362}
363
364SafeBrowsingDatabase::~SafeBrowsingDatabase() {
365}
366
367// static
368base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
369    const base::FilePath& db_base_filename) {
370  return base::FilePath(db_base_filename.value() + kBrowseDBFile);
371}
372
373// static
374base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
375    const base::FilePath& db_base_filename) {
376  return base::FilePath(db_base_filename.value() + kDownloadDBFile);
377}
378
379// static
380base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
381    const base::FilePath& db_filename) {
382  return base::FilePath(db_filename.value() + kBloomFilterFile);
383}
384
385// static
386base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
387    const base::FilePath& db_filename) {
388  return base::FilePath(db_filename.value() + kPrefixSetFile);
389}
390
391// static
392base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
393    const base::FilePath& db_filename) {
394  return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
395}
396
397// static
398base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
399    const base::FilePath& db_filename) {
400  return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
401}
402
403// static
404base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
405    const base::FilePath& db_filename) {
406  return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
407}
408
409// static
410base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
411    const base::FilePath& db_filename) {
412  return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
413}
414
415SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
416  if (list_id == safe_browsing_util::PHISH ||
417      list_id == safe_browsing_util::MALWARE) {
418    return browse_store_.get();
419  } else if (list_id == safe_browsing_util::BINURL ||
420             list_id == safe_browsing_util::BINHASH) {
421    return download_store_.get();
422  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
423    return csd_whitelist_store_.get();
424  } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
425    return download_whitelist_store_.get();
426  } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
427    return extension_blacklist_store_.get();
428  } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
429    return side_effect_free_whitelist_store_.get();
430  }
431  return NULL;
432}
433
434// static
435void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
436  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
437                            FAILURE_DATABASE_MAX);
438}
439
440SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
441    : creation_loop_(base::MessageLoop::current()),
442      browse_store_(new SafeBrowsingStoreFile),
443      reset_factory_(this),
444      corruption_detected_(false),
445      change_detected_(false) {
446  DCHECK(browse_store_.get());
447  DCHECK(!download_store_.get());
448  DCHECK(!csd_whitelist_store_.get());
449  DCHECK(!download_whitelist_store_.get());
450  DCHECK(!extension_blacklist_store_.get());
451  DCHECK(!side_effect_free_whitelist_store_.get());
452}
453
454SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
455    SafeBrowsingStore* browse_store,
456    SafeBrowsingStore* download_store,
457    SafeBrowsingStore* csd_whitelist_store,
458    SafeBrowsingStore* download_whitelist_store,
459    SafeBrowsingStore* extension_blacklist_store,
460    SafeBrowsingStore* side_effect_free_whitelist_store)
461    : creation_loop_(base::MessageLoop::current()),
462      browse_store_(browse_store),
463      download_store_(download_store),
464      csd_whitelist_store_(csd_whitelist_store),
465      download_whitelist_store_(download_whitelist_store),
466      extension_blacklist_store_(extension_blacklist_store),
467      side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
468      reset_factory_(this),
469      corruption_detected_(false) {
470  DCHECK(browse_store_.get());
471}
472
473SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
474  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
475}
476
477void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
478  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
479  // Ensure we haven't been run before.
480  DCHECK(browse_filename_.empty());
481  DCHECK(download_filename_.empty());
482  DCHECK(csd_whitelist_filename_.empty());
483  DCHECK(download_whitelist_filename_.empty());
484  DCHECK(extension_blacklist_filename_.empty());
485  DCHECK(side_effect_free_whitelist_filename_.empty());
486
487  browse_filename_ = BrowseDBFilename(filename_base);
488  browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
489
490  browse_store_->Init(
491      browse_filename_,
492      base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
493                 base::Unretained(this)));
494  DVLOG(1) << "Init browse store: " << browse_filename_.value();
495
496  {
497    // NOTE: There is no need to grab the lock in this function, since
498    // until it returns, there are no pointers to this class on other
499    // threads.  Then again, that means there is no possibility of
500    // contention on the lock...
501    base::AutoLock locked(lookup_lock_);
502    full_browse_hashes_.clear();
503    pending_browse_hashes_.clear();
504    LoadPrefixSet();
505  }
506
507  if (download_store_.get()) {
508    download_filename_ = DownloadDBFilename(filename_base);
509    download_store_->Init(
510        download_filename_,
511        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
512                   base::Unretained(this)));
513    DVLOG(1) << "Init download store: " << download_filename_.value();
514  }
515
516  if (csd_whitelist_store_.get()) {
517    csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
518    csd_whitelist_store_->Init(
519        csd_whitelist_filename_,
520        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
521                   base::Unretained(this)));
522    DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
523    std::vector<SBAddFullHash> full_hashes;
524    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
525      LoadWhitelist(full_hashes, &csd_whitelist_);
526    } else {
527      WhitelistEverything(&csd_whitelist_);
528    }
529  } else {
530    WhitelistEverything(&csd_whitelist_);  // Just to be safe.
531  }
532
533  if (download_whitelist_store_.get()) {
534    download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
535    download_whitelist_store_->Init(
536        download_whitelist_filename_,
537        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
538                   base::Unretained(this)));
539    DVLOG(1) << "Init download whitelist store: "
540             << download_whitelist_filename_.value();
541    std::vector<SBAddFullHash> full_hashes;
542    if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
543      LoadWhitelist(full_hashes, &download_whitelist_);
544    } else {
545      WhitelistEverything(&download_whitelist_);
546    }
547  } else {
548    WhitelistEverything(&download_whitelist_);  // Just to be safe.
549  }
550
551  if (extension_blacklist_store_.get()) {
552    extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
553    extension_blacklist_store_->Init(
554        extension_blacklist_filename_,
555        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
556                   base::Unretained(this)));
557    DVLOG(1) << "Init extension blacklist store: "
558             << extension_blacklist_filename_.value();
559  }
560
561  if (side_effect_free_whitelist_store_.get()) {
562    side_effect_free_whitelist_filename_ =
563        SideEffectFreeWhitelistDBFilename(filename_base);
564    side_effect_free_whitelist_prefix_set_filename_ =
565        PrefixSetForFilename(side_effect_free_whitelist_filename_);
566    side_effect_free_whitelist_store_->Init(
567        side_effect_free_whitelist_filename_,
568        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
569                   base::Unretained(this)));
570    DVLOG(1) << "Init side-effect free whitelist store: "
571             << side_effect_free_whitelist_filename_.value();
572
573    // If there is no database, the filter cannot be used.
574    base::PlatformFileInfo db_info;
575    if (file_util::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
576        && db_info.size != 0) {
577      const base::TimeTicks before = base::TimeTicks::Now();
578      side_effect_free_whitelist_prefix_set_.reset(
579          safe_browsing::PrefixSet::LoadFile(
580              side_effect_free_whitelist_prefix_set_filename_));
581      DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
582               << "prefix set in "
583               << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
584      UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
585                          base::TimeTicks::Now() - before);
586      if (!side_effect_free_whitelist_prefix_set_.get())
587        RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
588    }
589  } else {
590    // Delete any files of the side-effect free sidelist that may be around
591    // from when it was previously enabled.
592    SafeBrowsingStoreFile::DeleteStore(
593        SideEffectFreeWhitelistDBFilename(filename_base));
594  }
595}
596
597bool SafeBrowsingDatabaseNew::ResetDatabase() {
598  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
599
600  // Delete files on disk.
601  // TODO(shess): Hard to see where one might want to delete without a
602  // reset.  Perhaps inline |Delete()|?
603  if (!Delete())
604    return false;
605
606  // Reset objects in memory.
607  {
608    base::AutoLock locked(lookup_lock_);
609    full_browse_hashes_.clear();
610    pending_browse_hashes_.clear();
611    prefix_miss_cache_.clear();
612    browse_prefix_set_.reset();
613    side_effect_free_whitelist_prefix_set_.reset();
614  }
615  // Wants to acquire the lock itself.
616  WhitelistEverything(&csd_whitelist_);
617  WhitelistEverything(&download_whitelist_);
618
619  return true;
620}
621
622// TODO(lzheng): Remove matching_list, it is not used anywhere.
623bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
624    const GURL& url,
625    std::string* matching_list,
626    std::vector<SBPrefix>* prefix_hits,
627    std::vector<SBFullHashResult>* full_hits,
628    base::Time last_update) {
629  // Clear the results first.
630  matching_list->clear();
631  prefix_hits->clear();
632  full_hits->clear();
633
634  std::vector<SBFullHash> full_hashes;
635  BrowseFullHashesToCheck(url, false, &full_hashes);
636  if (full_hashes.empty())
637    return false;
638
639  // This function is called on the I/O thread, prevent changes to
640  // filter and caches.
641  base::AutoLock locked(lookup_lock_);
642
643  // |browse_prefix_set_| is empty until it is either read from disk, or the
644  // first update populates it.  Bail out without a hit if not yet
645  // available.
646  if (!browse_prefix_set_.get())
647    return false;
648
649  size_t miss_count = 0;
650  for (size_t i = 0; i < full_hashes.size(); ++i) {
651    const SBPrefix prefix = full_hashes[i].prefix;
652    if (browse_prefix_set_->Exists(prefix)) {
653      prefix_hits->push_back(prefix);
654      if (prefix_miss_cache_.count(prefix) > 0)
655        ++miss_count;
656    }
657  }
658
659  // If all the prefixes are cached as 'misses', don't issue a GetHash.
660  if (miss_count == prefix_hits->size())
661    return false;
662
663  // Find the matching full-hash results.  |full_browse_hashes_| are from the
664  // database, |pending_browse_hashes_| are from GetHash requests between
665  // updates.
666  std::sort(prefix_hits->begin(), prefix_hits->end());
667
668  GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
669                               full_hits, last_update);
670  GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
671                               full_hits, last_update);
672  return true;
673}
674
675bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
676    const std::vector<GURL>& urls,
677    std::vector<SBPrefix>* prefix_hits) {
678  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
679
680  // Ignore this check when download checking is not enabled.
681  if (!download_store_.get())
682    return false;
683
684  std::vector<SBPrefix> prefixes;
685  GetDownloadUrlPrefixes(urls, &prefixes);
686  return MatchAddPrefixes(download_store_.get(),
687                          safe_browsing_util::BINURL % 2,
688                          prefixes,
689                          prefix_hits);
690}
691
692bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
693    const SBPrefix& prefix) {
694  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
695
696  // Ignore this check when download store is not available.
697  if (!download_store_.get())
698    return false;
699
700  std::vector<SBPrefix> prefix_hits;
701  return MatchAddPrefixes(download_store_.get(),
702                          safe_browsing_util::BINHASH % 2,
703                          std::vector<SBPrefix>(1, prefix),
704                          &prefix_hits);
705}
706
707bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
708  // This method is theoretically thread-safe but we expect all calls to
709  // originate from the IO thread.
710  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
711  std::vector<SBFullHash> full_hashes;
712  BrowseFullHashesToCheck(url, true, &full_hashes);
713  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
714}
715
716bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
717  std::vector<SBFullHash> full_hashes;
718  BrowseFullHashesToCheck(url, true, &full_hashes);
719  return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
720}
721
722bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
723    const std::vector<SBPrefix>& prefixes,
724    std::vector<SBPrefix>* prefix_hits) {
725  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
726  if (!extension_blacklist_store_)
727    return false;
728
729  return MatchAddPrefixes(extension_blacklist_store_.get(),
730                          safe_browsing_util::EXTENSIONBLACKLIST % 2,
731                          prefixes,
732                          prefix_hits);
733}
734
735bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
736    const GURL& url) {
737  SBFullHash full_hash;
738  std::string host;
739  std::string path;
740  std::string query;
741  safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
742  std::string url_to_check = host + path;
743  if (!query.empty())
744    url_to_check +=  "?" + query;
745  crypto::SHA256HashString(url_to_check, &full_hash, sizeof(full_hash));
746
747  // This function can be called on any thread, so lock against any changes
748  base::AutoLock locked(lookup_lock_);
749
750  // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
751  // from disk, or the first update populates it.  Bail out without a hit if
752  // not yet available.
753  if (!side_effect_free_whitelist_prefix_set_.get())
754    return false;
755
756  return side_effect_free_whitelist_prefix_set_->Exists(full_hash.prefix);
757}
758
759bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
760    const std::string& str) {
761  SBFullHash hash;
762  crypto::SHA256HashString(str, &hash, sizeof(hash));
763  std::vector<SBFullHash> hashes;
764  hashes.push_back(hash);
765  return ContainsWhitelistedHashes(download_whitelist_, hashes);
766}
767
768bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
769    const SBWhitelist& whitelist,
770    const std::vector<SBFullHash>& hashes) {
771  base::AutoLock l(lookup_lock_);
772  if (whitelist.second)
773    return true;
774  for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
775       it != hashes.end(); ++it) {
776    if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), *it))
777      return true;
778  }
779  return false;
780}
781
782// Helper to insert entries for all of the prefixes or full hashes in
783// |entry| into the store.
784void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
785                                        const SBEntry* entry, int list_id) {
786  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
787
788  SafeBrowsingStore* store = GetStore(list_id);
789  if (!store) return;
790
791  STATS_COUNTER("SB.HostInsert", 1);
792  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
793  const int count = entry->prefix_count();
794
795  DCHECK(!entry->IsSub());
796  if (!count) {
797    // No prefixes, use host instead.
798    STATS_COUNTER("SB.PrefixAdd", 1);
799    store->WriteAddPrefix(encoded_chunk_id, host);
800  } else if (entry->IsPrefix()) {
801    // Prefixes only.
802    for (int i = 0; i < count; i++) {
803      const SBPrefix prefix = entry->PrefixAt(i);
804      STATS_COUNTER("SB.PrefixAdd", 1);
805      store->WriteAddPrefix(encoded_chunk_id, prefix);
806    }
807  } else {
808    // Prefixes and hashes.
809    const base::Time receive_time = base::Time::Now();
810    for (int i = 0; i < count; ++i) {
811      const SBFullHash full_hash = entry->FullHashAt(i);
812      const SBPrefix prefix = full_hash.prefix;
813
814      STATS_COUNTER("SB.PrefixAdd", 1);
815      store->WriteAddPrefix(encoded_chunk_id, prefix);
816
817      STATS_COUNTER("SB.PrefixAddFull", 1);
818      store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
819    }
820  }
821}
822
823// Helper to iterate over all the entries in the hosts in |chunks| and
824// add them to the store.
825void SafeBrowsingDatabaseNew::InsertAddChunks(
826    const safe_browsing_util::ListType list_id,
827    const SBChunkList& chunks) {
828  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
829
830  SafeBrowsingStore* store = GetStore(list_id);
831  if (!store) return;
832
833  for (SBChunkList::const_iterator citer = chunks.begin();
834       citer != chunks.end(); ++citer) {
835    const int chunk_id = citer->chunk_number;
836
837    // The server can give us a chunk that we already have because
838    // it's part of a range.  Don't add it again.
839    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
840    if (store->CheckAddChunk(encoded_chunk_id))
841      continue;
842
843    store->SetAddChunk(encoded_chunk_id);
844    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
845         hiter != citer->hosts.end(); ++hiter) {
846      // NOTE: Could pass |encoded_chunk_id|, but then inserting add
847      // chunks would look different from inserting sub chunks.
848      InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
849    }
850  }
851}
852
853// Helper to insert entries for all of the prefixes or full hashes in
854// |entry| into the store.
855void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
856                                        const SBEntry* entry, int list_id) {
857  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
858
859  SafeBrowsingStore* store = GetStore(list_id);
860  if (!store) return;
861
862  STATS_COUNTER("SB.HostDelete", 1);
863  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
864  const int count = entry->prefix_count();
865
866  DCHECK(entry->IsSub());
867  if (!count) {
868    // No prefixes, use host instead.
869    STATS_COUNTER("SB.PrefixSub", 1);
870    const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
871    store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
872  } else if (entry->IsPrefix()) {
873    // Prefixes only.
874    for (int i = 0; i < count; i++) {
875      const SBPrefix prefix = entry->PrefixAt(i);
876      const int add_chunk_id =
877          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
878
879      STATS_COUNTER("SB.PrefixSub", 1);
880      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
881    }
882  } else {
883    // Prefixes and hashes.
884    for (int i = 0; i < count; ++i) {
885      const SBFullHash full_hash = entry->FullHashAt(i);
886      const int add_chunk_id =
887          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
888
889      STATS_COUNTER("SB.PrefixSub", 1);
890      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
891
892      STATS_COUNTER("SB.PrefixSubFull", 1);
893      store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
894    }
895  }
896}
897
898// Helper to iterate over all the entries in the hosts in |chunks| and
899// add them to the store.
900void SafeBrowsingDatabaseNew::InsertSubChunks(
901    safe_browsing_util::ListType list_id,
902    const SBChunkList& chunks) {
903  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
904
905  SafeBrowsingStore* store = GetStore(list_id);
906  if (!store) return;
907
908  for (SBChunkList::const_iterator citer = chunks.begin();
909       citer != chunks.end(); ++citer) {
910    const int chunk_id = citer->chunk_number;
911
912    // The server can give us a chunk that we already have because
913    // it's part of a range.  Don't add it again.
914    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
915    if (store->CheckSubChunk(encoded_chunk_id))
916      continue;
917
918    store->SetSubChunk(encoded_chunk_id);
919    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
920         hiter != citer->hosts.end(); ++hiter) {
921      InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
922    }
923  }
924}
925
926void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
927                                           const SBChunkList& chunks) {
928  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
929
930  if (corruption_detected_ || chunks.empty())
931    return;
932
933  const base::TimeTicks before = base::TimeTicks::Now();
934
935  const safe_browsing_util::ListType list_id =
936      safe_browsing_util::GetListId(list_name);
937  DVLOG(2) << list_name << ": " << list_id;
938
939  SafeBrowsingStore* store = GetStore(list_id);
940  if (!store) return;
941
942  change_detected_ = true;
943
944  store->BeginChunk();
945  if (chunks.front().is_add) {
946    InsertAddChunks(list_id, chunks);
947  } else {
948    InsertSubChunks(list_id, chunks);
949  }
950  store->FinishChunk();
951
952  UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
953}
954
955void SafeBrowsingDatabaseNew::DeleteChunks(
956    const std::vector<SBChunkDelete>& chunk_deletes) {
957  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
958
959  if (corruption_detected_ || chunk_deletes.empty())
960    return;
961
962  const std::string& list_name = chunk_deletes.front().list_name;
963  const safe_browsing_util::ListType list_id =
964      safe_browsing_util::GetListId(list_name);
965
966  SafeBrowsingStore* store = GetStore(list_id);
967  if (!store) return;
968
969  change_detected_ = true;
970
971  for (size_t i = 0; i < chunk_deletes.size(); ++i) {
972    std::vector<int> chunk_numbers;
973    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
974    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
975      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
976      if (chunk_deletes[i].is_sub_del)
977        store->DeleteSubChunk(encoded_chunk_id);
978      else
979        store->DeleteAddChunk(encoded_chunk_id);
980    }
981  }
982}
983
984void SafeBrowsingDatabaseNew::CacheHashResults(
985    const std::vector<SBPrefix>& prefixes,
986    const std::vector<SBFullHashResult>& full_hits) {
987  // This is called on the I/O thread, lock against updates.
988  base::AutoLock locked(lookup_lock_);
989
990  if (full_hits.empty()) {
991    prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
992    return;
993  }
994
995  // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
996  // Refactor to make them identical.
997  const base::Time now = base::Time::Now();
998  const size_t orig_size = pending_browse_hashes_.size();
999  for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
1000       iter != full_hits.end(); ++iter) {
1001    const int list_id = safe_browsing_util::GetListId(iter->list_name);
1002    if (list_id == safe_browsing_util::MALWARE ||
1003        list_id == safe_browsing_util::PHISH) {
1004      int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
1005      SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
1006      pending_browse_hashes_.push_back(add_full_hash);
1007    }
1008  }
1009
1010  // Sort new entries then merge with the previously-sorted entries.
1011  std::vector<SBAddFullHash>::iterator
1012      orig_end = pending_browse_hashes_.begin() + orig_size;
1013  std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
1014  std::inplace_merge(pending_browse_hashes_.begin(),
1015                     orig_end, pending_browse_hashes_.end(),
1016                     SBAddFullHashPrefixLess);
1017}
1018
1019bool SafeBrowsingDatabaseNew::UpdateStarted(
1020    std::vector<SBListChunkRanges>* lists) {
1021  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1022  DCHECK(lists);
1023
1024  // If |BeginUpdate()| fails, reset the database.
1025  if (!browse_store_->BeginUpdate()) {
1026    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
1027    HandleCorruptDatabase();
1028    return false;
1029  }
1030
1031  if (download_store_.get() && !download_store_->BeginUpdate()) {
1032    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
1033    HandleCorruptDatabase();
1034    return false;
1035  }
1036
1037  if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
1038    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1039    HandleCorruptDatabase();
1040    return false;
1041  }
1042
1043  if (download_whitelist_store_.get() &&
1044      !download_whitelist_store_->BeginUpdate()) {
1045    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1046    HandleCorruptDatabase();
1047    return false;
1048  }
1049
1050  if (extension_blacklist_store_ &&
1051      !extension_blacklist_store_->BeginUpdate()) {
1052    RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1053    HandleCorruptDatabase();
1054    return false;
1055  }
1056
1057  if (side_effect_free_whitelist_store_ &&
1058      !side_effect_free_whitelist_store_->BeginUpdate()) {
1059    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1060    HandleCorruptDatabase();
1061    return false;
1062  }
1063
1064  std::vector<std::string> browse_listnames;
1065  browse_listnames.push_back(safe_browsing_util::kMalwareList);
1066  browse_listnames.push_back(safe_browsing_util::kPhishingList);
1067  UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);
1068
1069  if (download_store_.get()) {
1070    // This store used to contain kBinHashList in addition to
1071    // kBinUrlList.  Strip the stale data before generating the chunk
1072    // ranges to request.  UpdateChunkRanges() will traverse the chunk
1073    // list, so this is very cheap if there are no kBinHashList chunks.
1074    const int listid =
1075        safe_browsing_util::GetListId(safe_browsing_util::kBinHashList);
1076    DeleteChunksFromStore(download_store_.get(), listid);
1077
1078    // The above marks the chunks for deletion, but they are not
1079    // actually deleted until the database is rewritten.  The
1080    // following code removes the kBinHashList part of the request
1081    // before continuing so that UpdateChunkRanges() doesn't break.
1082    std::vector<std::string> download_listnames;
1083    download_listnames.push_back(safe_browsing_util::kBinUrlList);
1084    download_listnames.push_back(safe_browsing_util::kBinHashList);
1085    UpdateChunkRanges(download_store_.get(), download_listnames, lists);
1086    DCHECK_EQ(lists->back().name,
1087              std::string(safe_browsing_util::kBinHashList));
1088    lists->pop_back();
1089
1090    // TODO(shess): This problem could also be handled in
1091    // BeginUpdate() by detecting the chunks to delete and rewriting
1092    // the database before it's used.  When I implemented that, it
1093    // felt brittle, it might be easier to just wait for some future
1094    // format change.
1095  }
1096
1097  if (csd_whitelist_store_.get()) {
1098    std::vector<std::string> csd_whitelist_listnames;
1099    csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
1100    UpdateChunkRanges(csd_whitelist_store_.get(),
1101                      csd_whitelist_listnames, lists);
1102  }
1103
1104  if (download_whitelist_store_.get()) {
1105    std::vector<std::string> download_whitelist_listnames;
1106    download_whitelist_listnames.push_back(
1107        safe_browsing_util::kDownloadWhiteList);
1108    UpdateChunkRanges(download_whitelist_store_.get(),
1109                      download_whitelist_listnames, lists);
1110  }
1111
1112  if (extension_blacklist_store_) {
1113    UpdateChunkRanges(
1114        extension_blacklist_store_.get(),
1115        std::vector<std::string>(1, safe_browsing_util::kExtensionBlacklist),
1116        lists);
1117  }
1118
1119  if (side_effect_free_whitelist_store_) {
1120    UpdateChunkRanges(
1121        side_effect_free_whitelist_store_.get(),
1122        std::vector<std::string>(
1123            1, safe_browsing_util::kSideEffectFreeWhitelist),
1124        lists);
1125  }
1126
1127  corruption_detected_ = false;
1128  change_detected_ = false;
1129  return true;
1130}
1131
1132void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1133  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1134
1135  // The update may have failed due to corrupt storage (for instance,
1136  // an excessive number of invalid add_chunks and sub_chunks).
1137  // Double-check that the databases are valid.
1138  // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1139  // sections would allow throwing a corruption error in
1140  // UpdateStarted().
1141  if (!update_succeeded) {
1142    if (!browse_store_->CheckValidity())
1143      DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1144
1145    if (download_store_.get() && !download_store_->CheckValidity())
1146      DLOG(ERROR) << "Safe-browsing download database corrupt.";
1147
1148    if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1149      DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1150
1151    if (download_whitelist_store_.get() &&
1152        !download_whitelist_store_->CheckValidity()) {
1153      DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1154    }
1155
1156    if (extension_blacklist_store_ &&
1157        !extension_blacklist_store_->CheckValidity()) {
1158      DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1159    }
1160
1161    if (side_effect_free_whitelist_store_ &&
1162        !side_effect_free_whitelist_store_->CheckValidity()) {
1163      DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1164                  << "corrupt.";
1165    }
1166  }
1167
1168  if (corruption_detected_)
1169    return;
1170
1171  // Unroll the transaction if there was a protocol error or if the
1172  // transaction was empty.  This will leave the prefix set, the
1173  // pending hashes, and the prefix miss cache in place.
1174  if (!update_succeeded || !change_detected_) {
1175    // Track empty updates to answer questions at http://crbug.com/72216 .
1176    if (update_succeeded && !change_detected_)
1177      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1178    browse_store_->CancelUpdate();
1179    if (download_store_.get())
1180      download_store_->CancelUpdate();
1181    if (csd_whitelist_store_.get())
1182      csd_whitelist_store_->CancelUpdate();
1183    if (download_whitelist_store_.get())
1184      download_whitelist_store_->CancelUpdate();
1185    if (extension_blacklist_store_)
1186      extension_blacklist_store_->CancelUpdate();
1187    if (side_effect_free_whitelist_store_)
1188      side_effect_free_whitelist_store_->CancelUpdate();
1189    return;
1190  }
1191
1192  if (download_store_) {
1193    int64 size_bytes = UpdateHashPrefixStore(
1194        download_filename_,
1195        download_store_.get(),
1196        FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1197    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1198                         static_cast<int>(size_bytes / 1024));
1199  }
1200
1201  UpdateBrowseStore();
1202  UpdateWhitelistStore(csd_whitelist_filename_,
1203                       csd_whitelist_store_.get(),
1204                       &csd_whitelist_);
1205  UpdateWhitelistStore(download_whitelist_filename_,
1206                       download_whitelist_store_.get(),
1207                       &download_whitelist_);
1208
1209  if (extension_blacklist_store_) {
1210    int64 size_bytes = UpdateHashPrefixStore(
1211        extension_blacklist_filename_,
1212        extension_blacklist_store_.get(),
1213        FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1214    UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1215                         static_cast<int>(size_bytes / 1024));
1216  }
1217
1218  if (side_effect_free_whitelist_store_)
1219    UpdateSideEffectFreeWhitelistStore();
1220}
1221
1222void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1223    const base::FilePath& store_filename,
1224    SafeBrowsingStore* store,
1225    SBWhitelist* whitelist) {
1226  if (!store)
1227    return;
1228
1229  // For the whitelists, we don't cache and save full hashes since all
1230  // hashes are already full.
1231  std::vector<SBAddFullHash> empty_add_hashes;
1232
1233  // Not needed for the whitelists.
1234  std::set<SBPrefix> empty_miss_cache;
1235
1236  // Note: prefixes will not be empty.  The current data store implementation
1237  // stores all full-length hashes as both full and prefix hashes.
1238  SBAddPrefixes prefixes;
1239  std::vector<SBAddFullHash> full_hashes;
1240  if (!store->FinishUpdate(empty_add_hashes, empty_miss_cache, &prefixes,
1241                           &full_hashes)) {
1242    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1243    WhitelistEverything(whitelist);
1244    return;
1245  }
1246
1247#if defined(OS_MACOSX)
1248  base::mac::SetFileBackupExclusion(store_filename);
1249#endif
1250
1251  LoadWhitelist(full_hashes, whitelist);
1252}
1253
1254int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1255    const base::FilePath& store_filename,
1256    SafeBrowsingStore* store,
1257    FailureType failure_type) {
1258  // We don't cache and save full hashes.
1259  std::vector<SBAddFullHash> empty_add_hashes;
1260
1261  // Backend lookup happens only if a prefix is in add list.
1262  std::set<SBPrefix> empty_miss_cache;
1263
1264  // These results are not used after this call. Simply ignore the
1265  // returned value after FinishUpdate(...).
1266  SBAddPrefixes add_prefixes_result;
1267  std::vector<SBAddFullHash> add_full_hashes_result;
1268
1269  if (!store->FinishUpdate(empty_add_hashes,
1270                           empty_miss_cache,
1271                           &add_prefixes_result,
1272                           &add_full_hashes_result)) {
1273    RecordFailure(failure_type);
1274  }
1275
1276#if defined(OS_MACOSX)
1277  base::mac::SetFileBackupExclusion(store_filename);
1278#endif
1279
1280  return GetFileSizeOrZero(store_filename);
1281}
1282
1283void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1284  // Copy out the pending add hashes.  Copy rather than swapping in
1285  // case |ContainsBrowseURL()| is called before the new filter is complete.
1286  std::vector<SBAddFullHash> pending_add_hashes;
1287  {
1288    base::AutoLock locked(lookup_lock_);
1289    pending_add_hashes.insert(pending_add_hashes.end(),
1290                              pending_browse_hashes_.begin(),
1291                              pending_browse_hashes_.end());
1292  }
1293
1294  // Measure the amount of IO during the filter build.
1295  base::IoCounters io_before, io_after;
1296  base::ProcessHandle handle = base::Process::Current().handle();
1297  scoped_ptr<base::ProcessMetrics> metric(
1298#if !defined(OS_MACOSX)
1299      base::ProcessMetrics::CreateProcessMetrics(handle)
1300#else
1301      // Getting stats only for the current process is enough, so NULL is fine.
1302      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1303#endif
1304  );
1305
1306  // IoCounters are currently not supported on Mac, and may not be
1307  // available for Linux, so we check the result and only show IO
1308  // stats if they are available.
1309  const bool got_counters = metric->GetIOCounters(&io_before);
1310
1311  const base::TimeTicks before = base::TimeTicks::Now();
1312
1313  SBAddPrefixes add_prefixes;
1314  std::vector<SBAddFullHash> add_full_hashes;
1315  if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
1316                                   &add_prefixes, &add_full_hashes)) {
1317    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1318    return;
1319  }
1320
1321  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1322  // could be passed directly to |PrefixSet()|, removing the need for
1323  // |prefixes|.  For now, |prefixes| is useful while debugging
1324  // things.
1325  std::vector<SBPrefix> prefixes;
1326  prefixes.reserve(add_prefixes.size());
1327  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1328       iter != add_prefixes.end(); ++iter) {
1329    prefixes.push_back(iter->prefix);
1330  }
1331
1332  std::sort(prefixes.begin(), prefixes.end());
1333  scoped_ptr<safe_browsing::PrefixSet>
1334      prefix_set(new safe_browsing::PrefixSet(prefixes));
1335
1336  // This needs to be in sorted order by prefix for efficient access.
1337  std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1338            SBAddFullHashPrefixLess);
1339
1340  // Swap in the newly built filter and cache.
1341  {
1342    base::AutoLock locked(lookup_lock_);
1343    full_browse_hashes_.swap(add_full_hashes);
1344
1345    // TODO(shess): If |CacheHashResults()| is posted between the
1346    // earlier lock and this clear, those pending hashes will be lost.
1347    // It could be fixed by only removing hashes which were collected
1348    // at the earlier point.  I believe that is fail-safe as-is (the
1349    // hash will be fetched again).
1350    pending_browse_hashes_.clear();
1351    prefix_miss_cache_.clear();
1352    browse_prefix_set_.swap(prefix_set);
1353  }
1354
1355  DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1356           << (base::TimeTicks::Now() - before).InMilliseconds()
1357           << " ms total.  prefix count: " << add_prefixes.size();
1358  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1359
1360  // Persist the prefix set to disk.  Since only this thread changes
1361  // |browse_prefix_set_|, there is no need to lock.
1362  WritePrefixSet();
1363
1364  // Gather statistics.
1365  if (got_counters && metric->GetIOCounters(&io_after)) {
1366    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1367                         static_cast<int>(io_after.ReadTransferCount -
1368                                          io_before.ReadTransferCount) / 1024);
1369    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1370                         static_cast<int>(io_after.WriteTransferCount -
1371                                          io_before.WriteTransferCount) / 1024);
1372    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1373                         static_cast<int>(io_after.ReadOperationCount -
1374                                          io_before.ReadOperationCount));
1375    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1376                         static_cast<int>(io_after.WriteOperationCount -
1377                                          io_before.WriteOperationCount));
1378  }
1379
1380  int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
1381  UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1382                       static_cast<int>(file_size / 1024));
1383  file_size = GetFileSizeOrZero(browse_filename_);
1384  UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1385                       static_cast<int>(file_size / 1024));
1386
1387#if defined(OS_MACOSX)
1388  base::mac::SetFileBackupExclusion(browse_filename_);
1389#endif
1390}
1391
1392void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1393  std::vector<SBAddFullHash> empty_add_hashes;
1394  std::set<SBPrefix> empty_miss_cache;
1395  SBAddPrefixes add_prefixes;
1396  std::vector<SBAddFullHash> add_full_hashes_result;
1397
1398  if (!side_effect_free_whitelist_store_->FinishUpdate(
1399          empty_add_hashes,
1400          empty_miss_cache,
1401          &add_prefixes,
1402          &add_full_hashes_result)) {
1403    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1404    return;
1405  }
1406
1407  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1408  // could be passed directly to |PrefixSet()|, removing the need for
1409  // |prefixes|.  For now, |prefixes| is useful while debugging
1410  // things.
1411  std::vector<SBPrefix> prefixes;
1412  prefixes.reserve(add_prefixes.size());
1413  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1414       iter != add_prefixes.end(); ++iter) {
1415    prefixes.push_back(iter->prefix);
1416  }
1417
1418  std::sort(prefixes.begin(), prefixes.end());
1419  scoped_ptr<safe_browsing::PrefixSet>
1420      prefix_set(new safe_browsing::PrefixSet(prefixes));
1421
1422  // Swap in the newly built prefix set.
1423  {
1424    base::AutoLock locked(lookup_lock_);
1425    side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1426  }
1427
1428  const base::TimeTicks before = base::TimeTicks::Now();
1429  const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1430      side_effect_free_whitelist_prefix_set_filename_);
1431  DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1432           << "set in " << (base::TimeTicks::Now() - before).InMilliseconds()
1433           << " ms";
1434  UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1435                      base::TimeTicks::Now() - before);
1436
1437  if (!write_ok)
1438    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1439
1440  // Gather statistics.
1441  int64 file_size = GetFileSizeOrZero(
1442      side_effect_free_whitelist_prefix_set_filename_);
1443  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1444                       static_cast<int>(file_size / 1024));
1445  file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
1446  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1447                       static_cast<int>(file_size / 1024));
1448
1449#if defined(OS_MACOSX)
1450  base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1451  base::mac::SetFileBackupExclusion(
1452      side_effect_free_whitelist_prefix_set_filename_);
1453#endif
1454}
1455
1456void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1457  // Reset the database after the current task has unwound (but only
1458  // reset once within the scope of a given task).
1459  if (!reset_factory_.HasWeakPtrs()) {
1460    RecordFailure(FAILURE_DATABASE_CORRUPT);
1461    base::MessageLoop::current()->PostTask(FROM_HERE,
1462        base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1463                   reset_factory_.GetWeakPtr()));
1464  }
1465}
1466
1467void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1468  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1469  corruption_detected_ = true;  // Stop updating the database.
1470  ResetDatabase();
1471  DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1472}
1473
1474// TODO(shess): I'm not clear why this code doesn't have any
1475// real error-handling.
1476void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1477  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1478  DCHECK(!browse_prefix_set_filename_.empty());
1479
1480  // If there is no database, the filter cannot be used.
1481  base::PlatformFileInfo db_info;
1482  if (!file_util::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1483    return;
1484
1485  // Cleanup any stale bloom filter (no longer used).
1486  // TODO(shess): Track failure to delete?
1487  base::FilePath bloom_filter_filename =
1488      BloomFilterForFilename(browse_filename_);
1489  base::DeleteFile(bloom_filter_filename, false);
1490
1491  const base::TimeTicks before = base::TimeTicks::Now();
1492  browse_prefix_set_.reset(safe_browsing::PrefixSet::LoadFile(
1493      browse_prefix_set_filename_));
1494  DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1495           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1496  UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1497
1498  if (!browse_prefix_set_.get())
1499    RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1500}
1501
1502bool SafeBrowsingDatabaseNew::Delete() {
1503  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1504
1505  const bool r1 = browse_store_->Delete();
1506  if (!r1)
1507    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1508
1509  const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1510  if (!r2)
1511    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1512
1513  const bool r3 = csd_whitelist_store_.get() ?
1514      csd_whitelist_store_->Delete() : true;
1515  if (!r3)
1516    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1517
1518  const bool r4 = download_whitelist_store_.get() ?
1519      download_whitelist_store_->Delete() : true;
1520  if (!r4)
1521    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1522
1523  base::FilePath bloom_filter_filename =
1524      BloomFilterForFilename(browse_filename_);
1525  const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1526  if (!r5)
1527    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1528
1529  const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
1530  if (!r6)
1531    RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1532
1533  const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
1534  if (!r7)
1535    RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1536
1537  const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
1538                                    false);
1539  if (!r8)
1540    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1541
1542  const bool r9 = base::DeleteFile(
1543      side_effect_free_whitelist_prefix_set_filename_,
1544      false);
1545  if (!r9)
1546    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1547
1548  return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9;
1549}
1550
1551void SafeBrowsingDatabaseNew::WritePrefixSet() {
1552  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1553
1554  if (!browse_prefix_set_.get())
1555    return;
1556
1557  const base::TimeTicks before = base::TimeTicks::Now();
1558  const bool write_ok = browse_prefix_set_->WriteFile(
1559      browse_prefix_set_filename_);
1560  DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1561           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1562  UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1563
1564  if (!write_ok)
1565    RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1566
1567#if defined(OS_MACOSX)
1568  base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
1569#endif
1570}
1571
1572void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1573  base::AutoLock locked(lookup_lock_);
1574  whitelist->second = true;
1575  whitelist->first.clear();
1576}
1577
1578void SafeBrowsingDatabaseNew::LoadWhitelist(
1579    const std::vector<SBAddFullHash>& full_hashes,
1580    SBWhitelist* whitelist) {
1581  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1582  if (full_hashes.size() > kMaxWhitelistSize) {
1583    WhitelistEverything(whitelist);
1584    return;
1585  }
1586
1587  std::vector<SBFullHash> new_whitelist;
1588  new_whitelist.reserve(full_hashes.size());
1589  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1590       it != full_hashes.end(); ++it) {
1591    new_whitelist.push_back(it->full_hash);
1592  }
1593  std::sort(new_whitelist.begin(), new_whitelist.end());
1594
1595  SBFullHash kill_switch;
1596  crypto::SHA256HashString(kWhitelistKillSwitchUrl, &kill_switch,
1597                           sizeof(kill_switch));
1598  if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1599                         kill_switch)) {
1600    // The kill switch is whitelisted hence we whitelist all URLs.
1601    WhitelistEverything(whitelist);
1602  } else {
1603    base::AutoLock locked(lookup_lock_);
1604    whitelist->second = false;
1605    whitelist->first.swap(new_whitelist);
1606  }
1607}
1608