safe_browsing_database.cc revision 58e6fbe4ee35d65e14b626c557d37565bf8ad179
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7#include <algorithm>
8#include <iterator>
9
10#include "base/bind.h"
11#include "base/file_util.h"
12#include "base/message_loop/message_loop.h"
13#include "base/metrics/histogram.h"
14#include "base/metrics/stats_counters.h"
15#include "base/process/process.h"
16#include "base/process/process_metrics.h"
17#include "base/time/time.h"
18#include "chrome/browser/safe_browsing/prefix_set.h"
19#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
20#include "content/public/browser/browser_thread.h"
21#include "crypto/sha2.h"
22#include "url/gurl.h"
23
24#if defined(OS_MACOSX)
25#include "base/mac/mac_util.h"
26#endif
27
28using content::BrowserThread;
29
30namespace {
31
32// Filename suffix for the bloom filter.
33const base::FilePath::CharType kBloomFilterFile[] =
34    FILE_PATH_LITERAL(" Filter 2");
35// Filename suffix for the prefix set.
36const base::FilePath::CharType kPrefixSetFile[] =
37    FILE_PATH_LITERAL(" Prefix Set");
38// Filename suffix for download store.
39const base::FilePath::CharType kDownloadDBFile[] =
40    FILE_PATH_LITERAL(" Download");
41// Filename suffix for client-side phishing detection whitelist store.
42const base::FilePath::CharType kCsdWhitelistDBFile[] =
43    FILE_PATH_LITERAL(" Csd Whitelist");
44// Filename suffix for the download whitelist store.
45const base::FilePath::CharType kDownloadWhitelistDBFile[] =
46    FILE_PATH_LITERAL(" Download Whitelist");
47// Filename suffix for the extension blacklist store.
48const base::FilePath::CharType kExtensionBlacklistDBFile[] =
49    FILE_PATH_LITERAL(" Extension Blacklist");
50// Filename suffix for the side-effect free whitelist store.
51const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
52    FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
53// Filename suffix for browse store.
54// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
55// Unfortunately, to change the name implies lots of transition code
56// for little benefit.  If/when file formats change (say to put all
57// the data in one file), that would be a convenient point to rectify
58// this.
59const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
60
61// The maximum staleness for a cached entry.
62const int kMaxStalenessMinutes = 45;
63
64// Maximum number of entries we allow in any of the whitelists.
65// If a whitelist on disk contains more entries then all lookups to
66// the whitelist will be considered a match.
67const size_t kMaxWhitelistSize = 5000;
68
69// If the hash of this exact expression is on a whitelist then all
70// lookups to this whitelist will be considered a match.
71const char kWhitelistKillSwitchUrl[] =
72    "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
73
74// If the hash of this exact expression is on a whitelist then the
75// malware IP blacklisting feature will be disabled in csd.
76// Don't change this!
77const char kMalwareIPKillSwitchUrl[] =
78    "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
79
80// To save space, the incoming |chunk_id| and |list_id| are combined
81// into an |encoded_chunk_id| for storage by shifting the |list_id|
82// into the low-order bits.  These functions decode that information.
83// TODO(lzheng): It was reasonable when database is saved in sqlite, but
84// there should be better ways to save chunk_id and list_id after we use
85// SafeBrowsingStoreFile.
86int GetListIdBit(const int encoded_chunk_id) {
87  return encoded_chunk_id & 1;
88}
89int DecodeChunkId(int encoded_chunk_id) {
90  return encoded_chunk_id >> 1;
91}
92int EncodeChunkId(const int chunk, const int list_id) {
93  DCHECK_NE(list_id, safe_browsing_util::INVALID);
94  return chunk << 1 | list_id % 2;
95}
96
97// Generate the set of full hashes to check for |url|.  If
98// |include_whitelist_hashes| is true we will generate additional path-prefixes
99// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
100// whitelist it should also match /foo/bar which is not the case for all the
101// other lists.  We'll also always add a pattern for the empty path.
102// TODO(shess): This function is almost the same as
103// |CompareFullHashes()| in safe_browsing_util.cc, except that code
104// does an early exit on match.  Since match should be the infrequent
105// case (phishing or malware found), consider combining this function
106// with that one.
107void BrowseFullHashesToCheck(const GURL& url,
108                             bool include_whitelist_hashes,
109                             std::vector<SBFullHash>* full_hashes) {
110  std::vector<std::string> hosts;
111  if (url.HostIsIPAddress()) {
112    hosts.push_back(url.host());
113  } else {
114    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
115  }
116
117  std::vector<std::string> paths;
118  safe_browsing_util::GeneratePathsToCheck(url, &paths);
119
120  for (size_t i = 0; i < hosts.size(); ++i) {
121    for (size_t j = 0; j < paths.size(); ++j) {
122      const std::string& path = paths[j];
123      SBFullHash full_hash;
124      crypto::SHA256HashString(hosts[i] + path, &full_hash,
125                               sizeof(full_hash));
126      full_hashes->push_back(full_hash);
127
128      // We may have /foo as path-prefix in the whitelist which should
129      // also match with /foo/bar and /foo?bar.  Hence, for every path
130      // that ends in '/' we also add the path without the slash.
131      if (include_whitelist_hashes &&
132          path.size() > 1 &&
133          path[path.size() - 1] == '/') {
134        crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
135                                 &full_hash, sizeof(full_hash));
136        full_hashes->push_back(full_hash);
137      }
138    }
139  }
140}
141
142// Get the prefixes matching the download |urls|.
143void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
144                            std::vector<SBPrefix>* prefixes) {
145  std::vector<SBFullHash> full_hashes;
146  for (size_t i = 0; i < urls.size(); ++i)
147    BrowseFullHashesToCheck(urls[i], false, &full_hashes);
148
149  for (size_t i = 0; i < full_hashes.size(); ++i)
150    prefixes->push_back(full_hashes[i].prefix);
151}
152
153// Helper function to compare addprefixes in |store| with |prefixes|.
154// The |list_bit| indicates which list (url or hash) to compare.
155//
156// Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
157// the actual matching prefixes.
158bool MatchAddPrefixes(SafeBrowsingStore* store,
159                      int list_bit,
160                      const std::vector<SBPrefix>& prefixes,
161                      std::vector<SBPrefix>* prefix_hits) {
162  prefix_hits->clear();
163  bool found_match = false;
164
165  SBAddPrefixes add_prefixes;
166  store->GetAddPrefixes(&add_prefixes);
167  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
168       iter != add_prefixes.end(); ++iter) {
169    for (size_t j = 0; j < prefixes.size(); ++j) {
170      const SBPrefix& prefix = prefixes[j];
171      if (prefix == iter->prefix &&
172          GetListIdBit(iter->chunk_id) == list_bit) {
173        prefix_hits->push_back(prefix);
174        found_match = true;
175      }
176    }
177  }
178  return found_match;
179}
180
181// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
182// add them to |full_hits| if not expired.  "Not expired" is when
183// either |last_update| was recent enough, or the item has been
184// received recently enough.  Expired items are not deleted because a
185// future update may make them acceptable again.
186//
187// For efficiency reasons the code walks |prefix_hits| and
188// |full_hashes| in parallel, so they must be sorted by prefix.
189void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
190                                  const std::vector<SBAddFullHash>& full_hashes,
191                                  std::vector<SBFullHashResult>* full_hits,
192                                  base::Time last_update) {
193  const base::Time expire_time =
194      base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
195
196  std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
197  std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
198
199  while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
200    if (*piter < hiter->full_hash.prefix) {
201      ++piter;
202    } else if (hiter->full_hash.prefix < *piter) {
203      ++hiter;
204    } else {
205      if (expire_time < last_update ||
206          expire_time.ToTimeT() < hiter->received) {
207        SBFullHashResult result;
208        const int list_bit = GetListIdBit(hiter->chunk_id);
209        DCHECK(list_bit == safe_browsing_util::MALWARE ||
210               list_bit == safe_browsing_util::PHISH);
211        const safe_browsing_util::ListType list_id =
212            static_cast<safe_browsing_util::ListType>(list_bit);
213        if (!safe_browsing_util::GetListName(list_id, &result.list_name))
214          continue;
215        result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
216        result.hash = hiter->full_hash;
217        full_hits->push_back(result);
218      }
219
220      // Only increment |hiter|, |piter| might have multiple hits.
221      ++hiter;
222    }
223  }
224}
225
226// This function generates a chunk range string for |chunks|. It
227// outputs one chunk range string per list and writes it to the
228// |list_ranges| vector.  We expect |list_ranges| to already be of the
229// right size.  E.g., if |chunks| contains chunks with two different
230// list ids then |list_ranges| must contain two elements.
231void GetChunkRanges(const std::vector<int>& chunks,
232                    std::vector<std::string>* list_ranges) {
233  DCHECK_GT(list_ranges->size(), 0U);
234  DCHECK_LE(list_ranges->size(), 2U);
235  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
236  for (std::vector<int>::const_iterator iter = chunks.begin();
237       iter != chunks.end(); ++iter) {
238    int mod_list_id = GetListIdBit(*iter);
239    DCHECK_GE(mod_list_id, 0);
240    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
241    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
242  }
243  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
244    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
245  }
246}
247
248// Helper function to create chunk range lists for Browse related
249// lists.
250void UpdateChunkRanges(SafeBrowsingStore* store,
251                       const std::vector<std::string>& listnames,
252                       std::vector<SBListChunkRanges>* lists) {
253  DCHECK_GT(listnames.size(), 0U);
254  DCHECK_LE(listnames.size(), 2U);
255  std::vector<int> add_chunks;
256  std::vector<int> sub_chunks;
257  store->GetAddChunks(&add_chunks);
258  store->GetSubChunks(&sub_chunks);
259
260  std::vector<std::string> adds(listnames.size());
261  std::vector<std::string> subs(listnames.size());
262  GetChunkRanges(add_chunks, &adds);
263  GetChunkRanges(sub_chunks, &subs);
264
265  for (size_t i = 0; i < listnames.size(); ++i) {
266    const std::string& listname = listnames[i];
267    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
268              static_cast<int>(i % 2));
269    DCHECK_NE(safe_browsing_util::GetListId(listname),
270              safe_browsing_util::INVALID);
271    lists->push_back(SBListChunkRanges(listname));
272    lists->back().adds.swap(adds[i]);
273    lists->back().subs.swap(subs[i]);
274  }
275}
276
277// Helper for deleting chunks left over from obsolete lists.
278void DeleteChunksFromStore(SafeBrowsingStore* store, int listid){
279  std::vector<int> add_chunks;
280  size_t adds_deleted = 0;
281  store->GetAddChunks(&add_chunks);
282  for (std::vector<int>::const_iterator iter = add_chunks.begin();
283       iter != add_chunks.end(); ++iter) {
284    if (GetListIdBit(*iter) == GetListIdBit(listid)) {
285      adds_deleted++;
286      store->DeleteAddChunk(*iter);
287    }
288  }
289  if (adds_deleted > 0)
290    UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashAddsDeleted", adds_deleted);
291
292  std::vector<int> sub_chunks;
293  size_t subs_deleted = 0;
294  store->GetSubChunks(&sub_chunks);
295  for (std::vector<int>::const_iterator iter = sub_chunks.begin();
296       iter != sub_chunks.end(); ++iter) {
297    if (GetListIdBit(*iter) == GetListIdBit(listid)) {
298      subs_deleted++;
299      store->DeleteSubChunk(*iter);
300    }
301  }
302  if (subs_deleted > 0)
303    UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted);
304}
305
306// Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
307// safe_browsing_store.h orders on both chunk-id and prefix.
308bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
309  return a.full_hash.prefix < b.full_hash.prefix;
310}
311
312// This code always checks for non-zero file size.  This helper makes
313// that less verbose.
314int64 GetFileSizeOrZero(const base::FilePath& file_path) {
315  int64 size_64;
316  if (!file_util::GetFileSize(file_path, &size_64))
317    return 0;
318  return size_64;
319}
320
321}  // namespace
322
323// The default SafeBrowsingDatabaseFactory.
324class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
325 public:
326  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
327      bool enable_download_protection,
328      bool enable_client_side_whitelist,
329      bool enable_download_whitelist,
330      bool enable_extension_blacklist,
331      bool enable_side_effect_free_whitelist) OVERRIDE {
332    return new SafeBrowsingDatabaseNew(
333        new SafeBrowsingStoreFile,
334        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
335        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
336        enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
337        enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
338        enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL);
339  }
340
341  SafeBrowsingDatabaseFactoryImpl() { }
342
343 private:
344  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
345};
346
347// static
348SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
349
350// Factory method, non-thread safe. Caller has to make sure this s called
351// on SafeBrowsing Thread.
352// TODO(shess): There's no need for a factory any longer.  Convert
353// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
354// callers just construct things directly.
355SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
356    bool enable_download_protection,
357    bool enable_client_side_whitelist,
358    bool enable_download_whitelist,
359    bool enable_extension_blacklist,
360    bool enable_side_effect_free_whitelist) {
361  if (!factory_)
362    factory_ = new SafeBrowsingDatabaseFactoryImpl();
363  return factory_->CreateSafeBrowsingDatabase(
364      enable_download_protection,
365      enable_client_side_whitelist,
366      enable_download_whitelist,
367      enable_extension_blacklist,
368      enable_side_effect_free_whitelist);
369}
370
371SafeBrowsingDatabase::~SafeBrowsingDatabase() {
372}
373
374// static
375base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
376    const base::FilePath& db_base_filename) {
377  return base::FilePath(db_base_filename.value() + kBrowseDBFile);
378}
379
380// static
381base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
382    const base::FilePath& db_base_filename) {
383  return base::FilePath(db_base_filename.value() + kDownloadDBFile);
384}
385
386// static
387base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
388    const base::FilePath& db_filename) {
389  return base::FilePath(db_filename.value() + kBloomFilterFile);
390}
391
392// static
393base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
394    const base::FilePath& db_filename) {
395  return base::FilePath(db_filename.value() + kPrefixSetFile);
396}
397
398// static
399base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
400    const base::FilePath& db_filename) {
401  return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
402}
403
404// static
405base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
406    const base::FilePath& db_filename) {
407  return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
408}
409
410// static
411base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
412    const base::FilePath& db_filename) {
413  return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
414}
415
416// static
417base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
418    const base::FilePath& db_filename) {
419  return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
420}
421
422SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
423  if (list_id == safe_browsing_util::PHISH ||
424      list_id == safe_browsing_util::MALWARE) {
425    return browse_store_.get();
426  } else if (list_id == safe_browsing_util::BINURL ||
427             list_id == safe_browsing_util::BINHASH) {
428    return download_store_.get();
429  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
430    return csd_whitelist_store_.get();
431  } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
432    return download_whitelist_store_.get();
433  } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
434    return extension_blacklist_store_.get();
435  } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
436    return side_effect_free_whitelist_store_.get();
437  }
438  return NULL;
439}
440
441// static
442void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
443  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
444                            FAILURE_DATABASE_MAX);
445}
446
447SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
448    : creation_loop_(base::MessageLoop::current()),
449      browse_store_(new SafeBrowsingStoreFile),
450      reset_factory_(this),
451      corruption_detected_(false),
452      change_detected_(false) {
453  DCHECK(browse_store_.get());
454  DCHECK(!download_store_.get());
455  DCHECK(!csd_whitelist_store_.get());
456  DCHECK(!download_whitelist_store_.get());
457  DCHECK(!extension_blacklist_store_.get());
458  DCHECK(!side_effect_free_whitelist_store_.get());
459}
460
461SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
462    SafeBrowsingStore* browse_store,
463    SafeBrowsingStore* download_store,
464    SafeBrowsingStore* csd_whitelist_store,
465    SafeBrowsingStore* download_whitelist_store,
466    SafeBrowsingStore* extension_blacklist_store,
467    SafeBrowsingStore* side_effect_free_whitelist_store)
468    : creation_loop_(base::MessageLoop::current()),
469      browse_store_(browse_store),
470      download_store_(download_store),
471      csd_whitelist_store_(csd_whitelist_store),
472      download_whitelist_store_(download_whitelist_store),
473      extension_blacklist_store_(extension_blacklist_store),
474      side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
475      reset_factory_(this),
476      corruption_detected_(false) {
477  DCHECK(browse_store_.get());
478}
479
480SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
481  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
482}
483
484void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
485  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
486  // Ensure we haven't been run before.
487  DCHECK(browse_filename_.empty());
488  DCHECK(download_filename_.empty());
489  DCHECK(csd_whitelist_filename_.empty());
490  DCHECK(download_whitelist_filename_.empty());
491  DCHECK(extension_blacklist_filename_.empty());
492  DCHECK(side_effect_free_whitelist_filename_.empty());
493
494  browse_filename_ = BrowseDBFilename(filename_base);
495  browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
496
497  browse_store_->Init(
498      browse_filename_,
499      base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
500                 base::Unretained(this)));
501  DVLOG(1) << "Init browse store: " << browse_filename_.value();
502
503  {
504    // NOTE: There is no need to grab the lock in this function, since
505    // until it returns, there are no pointers to this class on other
506    // threads.  Then again, that means there is no possibility of
507    // contention on the lock...
508    base::AutoLock locked(lookup_lock_);
509    full_browse_hashes_.clear();
510    pending_browse_hashes_.clear();
511    LoadPrefixSet();
512  }
513
514  if (download_store_.get()) {
515    download_filename_ = DownloadDBFilename(filename_base);
516    download_store_->Init(
517        download_filename_,
518        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
519                   base::Unretained(this)));
520    DVLOG(1) << "Init download store: " << download_filename_.value();
521  }
522
523  if (csd_whitelist_store_.get()) {
524    csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
525    csd_whitelist_store_->Init(
526        csd_whitelist_filename_,
527        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
528                   base::Unretained(this)));
529    DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
530    std::vector<SBAddFullHash> full_hashes;
531    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
532      LoadWhitelist(full_hashes, &csd_whitelist_);
533    } else {
534      WhitelistEverything(&csd_whitelist_);
535    }
536  } else {
537    WhitelistEverything(&csd_whitelist_);  // Just to be safe.
538  }
539
540  if (download_whitelist_store_.get()) {
541    download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
542    download_whitelist_store_->Init(
543        download_whitelist_filename_,
544        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
545                   base::Unretained(this)));
546    DVLOG(1) << "Init download whitelist store: "
547             << download_whitelist_filename_.value();
548    std::vector<SBAddFullHash> full_hashes;
549    if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
550      LoadWhitelist(full_hashes, &download_whitelist_);
551    } else {
552      WhitelistEverything(&download_whitelist_);
553    }
554  } else {
555    WhitelistEverything(&download_whitelist_);  // Just to be safe.
556  }
557
558  if (extension_blacklist_store_.get()) {
559    extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
560    extension_blacklist_store_->Init(
561        extension_blacklist_filename_,
562        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
563                   base::Unretained(this)));
564    DVLOG(1) << "Init extension blacklist store: "
565             << extension_blacklist_filename_.value();
566  }
567
568  if (side_effect_free_whitelist_store_.get()) {
569    side_effect_free_whitelist_filename_ =
570        SideEffectFreeWhitelistDBFilename(filename_base);
571    side_effect_free_whitelist_prefix_set_filename_ =
572        PrefixSetForFilename(side_effect_free_whitelist_filename_);
573    side_effect_free_whitelist_store_->Init(
574        side_effect_free_whitelist_filename_,
575        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
576                   base::Unretained(this)));
577    DVLOG(1) << "Init side-effect free whitelist store: "
578             << side_effect_free_whitelist_filename_.value();
579
580    // If there is no database, the filter cannot be used.
581    base::PlatformFileInfo db_info;
582    if (file_util::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
583        && db_info.size != 0) {
584      const base::TimeTicks before = base::TimeTicks::Now();
585      side_effect_free_whitelist_prefix_set_.reset(
586          safe_browsing::PrefixSet::LoadFile(
587              side_effect_free_whitelist_prefix_set_filename_));
588      DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
589               << "prefix set in "
590               << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
591      UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
592                          base::TimeTicks::Now() - before);
593      if (!side_effect_free_whitelist_prefix_set_.get())
594        RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
595    }
596  } else {
597    // Delete any files of the side-effect free sidelist that may be around
598    // from when it was previously enabled.
599    SafeBrowsingStoreFile::DeleteStore(
600        SideEffectFreeWhitelistDBFilename(filename_base));
601  }
602}
603
604bool SafeBrowsingDatabaseNew::ResetDatabase() {
605  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
606
607  // Delete files on disk.
608  // TODO(shess): Hard to see where one might want to delete without a
609  // reset.  Perhaps inline |Delete()|?
610  if (!Delete())
611    return false;
612
613  // Reset objects in memory.
614  {
615    base::AutoLock locked(lookup_lock_);
616    full_browse_hashes_.clear();
617    pending_browse_hashes_.clear();
618    prefix_miss_cache_.clear();
619    browse_prefix_set_.reset();
620    side_effect_free_whitelist_prefix_set_.reset();
621  }
622  // Wants to acquire the lock itself.
623  WhitelistEverything(&csd_whitelist_);
624  WhitelistEverything(&download_whitelist_);
625
626  return true;
627}
628
629// TODO(lzheng): Remove matching_list, it is not used anywhere.
630bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
631    const GURL& url,
632    std::string* matching_list,
633    std::vector<SBPrefix>* prefix_hits,
634    std::vector<SBFullHashResult>* full_hits,
635    base::Time last_update) {
636  // Clear the results first.
637  matching_list->clear();
638  prefix_hits->clear();
639  full_hits->clear();
640
641  std::vector<SBFullHash> full_hashes;
642  BrowseFullHashesToCheck(url, false, &full_hashes);
643  if (full_hashes.empty())
644    return false;
645
646  // This function is called on the I/O thread, prevent changes to
647  // filter and caches.
648  base::AutoLock locked(lookup_lock_);
649
650  // |browse_prefix_set_| is empty until it is either read from disk, or the
651  // first update populates it.  Bail out without a hit if not yet
652  // available.
653  if (!browse_prefix_set_.get())
654    return false;
655
656  size_t miss_count = 0;
657  for (size_t i = 0; i < full_hashes.size(); ++i) {
658    const SBPrefix prefix = full_hashes[i].prefix;
659    if (browse_prefix_set_->Exists(prefix)) {
660      prefix_hits->push_back(prefix);
661      if (prefix_miss_cache_.count(prefix) > 0)
662        ++miss_count;
663    }
664  }
665
666  // If all the prefixes are cached as 'misses', don't issue a GetHash.
667  if (miss_count == prefix_hits->size())
668    return false;
669
670  // Find the matching full-hash results.  |full_browse_hashes_| are from the
671  // database, |pending_browse_hashes_| are from GetHash requests between
672  // updates.
673  std::sort(prefix_hits->begin(), prefix_hits->end());
674
675  GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
676                               full_hits, last_update);
677  GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
678                               full_hits, last_update);
679  return true;
680}
681
682bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
683    const std::vector<GURL>& urls,
684    std::vector<SBPrefix>* prefix_hits) {
685  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
686
687  // Ignore this check when download checking is not enabled.
688  if (!download_store_.get())
689    return false;
690
691  std::vector<SBPrefix> prefixes;
692  GetDownloadUrlPrefixes(urls, &prefixes);
693  return MatchAddPrefixes(download_store_.get(),
694                          safe_browsing_util::BINURL % 2,
695                          prefixes,
696                          prefix_hits);
697}
698
699bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
700    const SBPrefix& prefix) {
701  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
702
703  // Ignore this check when download store is not available.
704  if (!download_store_.get())
705    return false;
706
707  std::vector<SBPrefix> prefix_hits;
708  return MatchAddPrefixes(download_store_.get(),
709                          safe_browsing_util::BINHASH % 2,
710                          std::vector<SBPrefix>(1, prefix),
711                          &prefix_hits);
712}
713
714bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
715  // This method is theoretically thread-safe but we expect all calls to
716  // originate from the IO thread.
717  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
718  std::vector<SBFullHash> full_hashes;
719  BrowseFullHashesToCheck(url, true, &full_hashes);
720  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
721}
722
723bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
724  std::vector<SBFullHash> full_hashes;
725  BrowseFullHashesToCheck(url, true, &full_hashes);
726  return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
727}
728
729bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
730    const std::vector<SBPrefix>& prefixes,
731    std::vector<SBPrefix>* prefix_hits) {
732  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
733  if (!extension_blacklist_store_)
734    return false;
735
736  return MatchAddPrefixes(extension_blacklist_store_.get(),
737                          safe_browsing_util::EXTENSIONBLACKLIST % 2,
738                          prefixes,
739                          prefix_hits);
740}
741
742bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
743    const GURL& url) {
744  SBFullHash full_hash;
745  std::string host;
746  std::string path;
747  std::string query;
748  safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
749  std::string url_to_check = host + path;
750  if (!query.empty())
751    url_to_check +=  "?" + query;
752  crypto::SHA256HashString(url_to_check, &full_hash, sizeof(full_hash));
753
754  // This function can be called on any thread, so lock against any changes
755  base::AutoLock locked(lookup_lock_);
756
757  // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
758  // from disk, or the first update populates it.  Bail out without a hit if
759  // not yet available.
760  if (!side_effect_free_whitelist_prefix_set_.get())
761    return false;
762
763  return side_effect_free_whitelist_prefix_set_->Exists(full_hash.prefix);
764}
765
766bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
767    const std::string& str) {
768  SBFullHash hash;
769  crypto::SHA256HashString(str, &hash, sizeof(hash));
770  std::vector<SBFullHash> hashes;
771  hashes.push_back(hash);
772  return ContainsWhitelistedHashes(download_whitelist_, hashes);
773}
774
775bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
776    const SBWhitelist& whitelist,
777    const std::vector<SBFullHash>& hashes) {
778  base::AutoLock l(lookup_lock_);
779  if (whitelist.second)
780    return true;
781  for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
782       it != hashes.end(); ++it) {
783    if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), *it))
784      return true;
785  }
786  return false;
787}
788
789// Helper to insert entries for all of the prefixes or full hashes in
790// |entry| into the store.
791void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
792                                        const SBEntry* entry, int list_id) {
793  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
794
795  SafeBrowsingStore* store = GetStore(list_id);
796  if (!store) return;
797
798  STATS_COUNTER("SB.HostInsert", 1);
799  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
800  const int count = entry->prefix_count();
801
802  DCHECK(!entry->IsSub());
803  if (!count) {
804    // No prefixes, use host instead.
805    STATS_COUNTER("SB.PrefixAdd", 1);
806    store->WriteAddPrefix(encoded_chunk_id, host);
807  } else if (entry->IsPrefix()) {
808    // Prefixes only.
809    for (int i = 0; i < count; i++) {
810      const SBPrefix prefix = entry->PrefixAt(i);
811      STATS_COUNTER("SB.PrefixAdd", 1);
812      store->WriteAddPrefix(encoded_chunk_id, prefix);
813    }
814  } else {
815    // Prefixes and hashes.
816    const base::Time receive_time = base::Time::Now();
817    for (int i = 0; i < count; ++i) {
818      const SBFullHash full_hash = entry->FullHashAt(i);
819      const SBPrefix prefix = full_hash.prefix;
820
821      STATS_COUNTER("SB.PrefixAdd", 1);
822      store->WriteAddPrefix(encoded_chunk_id, prefix);
823
824      STATS_COUNTER("SB.PrefixAddFull", 1);
825      store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
826    }
827  }
828}
829
830// Helper to iterate over all the entries in the hosts in |chunks| and
831// add them to the store.
832void SafeBrowsingDatabaseNew::InsertAddChunks(
833    const safe_browsing_util::ListType list_id,
834    const SBChunkList& chunks) {
835  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
836
837  SafeBrowsingStore* store = GetStore(list_id);
838  if (!store) return;
839
840  for (SBChunkList::const_iterator citer = chunks.begin();
841       citer != chunks.end(); ++citer) {
842    const int chunk_id = citer->chunk_number;
843
844    // The server can give us a chunk that we already have because
845    // it's part of a range.  Don't add it again.
846    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
847    if (store->CheckAddChunk(encoded_chunk_id))
848      continue;
849
850    store->SetAddChunk(encoded_chunk_id);
851    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
852         hiter != citer->hosts.end(); ++hiter) {
853      // NOTE: Could pass |encoded_chunk_id|, but then inserting add
854      // chunks would look different from inserting sub chunks.
855      InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
856    }
857  }
858}
859
860// Helper to insert entries for all of the prefixes or full hashes in
861// |entry| into the store.
862void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
863                                        const SBEntry* entry, int list_id) {
864  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
865
866  SafeBrowsingStore* store = GetStore(list_id);
867  if (!store) return;
868
869  STATS_COUNTER("SB.HostDelete", 1);
870  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
871  const int count = entry->prefix_count();
872
873  DCHECK(entry->IsSub());
874  if (!count) {
875    // No prefixes, use host instead.
876    STATS_COUNTER("SB.PrefixSub", 1);
877    const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
878    store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
879  } else if (entry->IsPrefix()) {
880    // Prefixes only.
881    for (int i = 0; i < count; i++) {
882      const SBPrefix prefix = entry->PrefixAt(i);
883      const int add_chunk_id =
884          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
885
886      STATS_COUNTER("SB.PrefixSub", 1);
887      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
888    }
889  } else {
890    // Prefixes and hashes.
891    for (int i = 0; i < count; ++i) {
892      const SBFullHash full_hash = entry->FullHashAt(i);
893      const int add_chunk_id =
894          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
895
896      STATS_COUNTER("SB.PrefixSub", 1);
897      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
898
899      STATS_COUNTER("SB.PrefixSubFull", 1);
900      store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
901    }
902  }
903}
904
905// Helper to iterate over all the entries in the hosts in |chunks| and
906// add them to the store.
907void SafeBrowsingDatabaseNew::InsertSubChunks(
908    safe_browsing_util::ListType list_id,
909    const SBChunkList& chunks) {
910  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
911
912  SafeBrowsingStore* store = GetStore(list_id);
913  if (!store) return;
914
915  for (SBChunkList::const_iterator citer = chunks.begin();
916       citer != chunks.end(); ++citer) {
917    const int chunk_id = citer->chunk_number;
918
919    // The server can give us a chunk that we already have because
920    // it's part of a range.  Don't add it again.
921    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
922    if (store->CheckSubChunk(encoded_chunk_id))
923      continue;
924
925    store->SetSubChunk(encoded_chunk_id);
926    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
927         hiter != citer->hosts.end(); ++hiter) {
928      InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
929    }
930  }
931}
932
933void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
934                                           const SBChunkList& chunks) {
935  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
936
937  if (corruption_detected_ || chunks.empty())
938    return;
939
940  const base::TimeTicks before = base::TimeTicks::Now();
941
942  const safe_browsing_util::ListType list_id =
943      safe_browsing_util::GetListId(list_name);
944  DVLOG(2) << list_name << ": " << list_id;
945
946  SafeBrowsingStore* store = GetStore(list_id);
947  if (!store) return;
948
949  change_detected_ = true;
950
951  store->BeginChunk();
952  if (chunks.front().is_add) {
953    InsertAddChunks(list_id, chunks);
954  } else {
955    InsertSubChunks(list_id, chunks);
956  }
957  store->FinishChunk();
958
959  UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
960}
961
962void SafeBrowsingDatabaseNew::DeleteChunks(
963    const std::vector<SBChunkDelete>& chunk_deletes) {
964  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
965
966  if (corruption_detected_ || chunk_deletes.empty())
967    return;
968
969  const std::string& list_name = chunk_deletes.front().list_name;
970  const safe_browsing_util::ListType list_id =
971      safe_browsing_util::GetListId(list_name);
972
973  SafeBrowsingStore* store = GetStore(list_id);
974  if (!store) return;
975
976  change_detected_ = true;
977
978  for (size_t i = 0; i < chunk_deletes.size(); ++i) {
979    std::vector<int> chunk_numbers;
980    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
981    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
982      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
983      if (chunk_deletes[i].is_sub_del)
984        store->DeleteSubChunk(encoded_chunk_id);
985      else
986        store->DeleteAddChunk(encoded_chunk_id);
987    }
988  }
989}
990
991void SafeBrowsingDatabaseNew::CacheHashResults(
992    const std::vector<SBPrefix>& prefixes,
993    const std::vector<SBFullHashResult>& full_hits) {
994  // This is called on the I/O thread, lock against updates.
995  base::AutoLock locked(lookup_lock_);
996
997  if (full_hits.empty()) {
998    prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
999    return;
1000  }
1001
1002  // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
1003  // Refactor to make them identical.
1004  const base::Time now = base::Time::Now();
1005  const size_t orig_size = pending_browse_hashes_.size();
1006  for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
1007       iter != full_hits.end(); ++iter) {
1008    const int list_id = safe_browsing_util::GetListId(iter->list_name);
1009    if (list_id == safe_browsing_util::MALWARE ||
1010        list_id == safe_browsing_util::PHISH) {
1011      int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
1012      SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
1013      pending_browse_hashes_.push_back(add_full_hash);
1014    }
1015  }
1016
1017  // Sort new entries then merge with the previously-sorted entries.
1018  std::vector<SBAddFullHash>::iterator
1019      orig_end = pending_browse_hashes_.begin() + orig_size;
1020  std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
1021  std::inplace_merge(pending_browse_hashes_.begin(),
1022                     orig_end, pending_browse_hashes_.end(),
1023                     SBAddFullHashPrefixLess);
1024}
1025
1026bool SafeBrowsingDatabaseNew::UpdateStarted(
1027    std::vector<SBListChunkRanges>* lists) {
1028  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1029  DCHECK(lists);
1030
1031  // If |BeginUpdate()| fails, reset the database.
1032  if (!browse_store_->BeginUpdate()) {
1033    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
1034    HandleCorruptDatabase();
1035    return false;
1036  }
1037
1038  if (download_store_.get() && !download_store_->BeginUpdate()) {
1039    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
1040    HandleCorruptDatabase();
1041    return false;
1042  }
1043
1044  if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
1045    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1046    HandleCorruptDatabase();
1047    return false;
1048  }
1049
1050  if (download_whitelist_store_.get() &&
1051      !download_whitelist_store_->BeginUpdate()) {
1052    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1053    HandleCorruptDatabase();
1054    return false;
1055  }
1056
1057  if (extension_blacklist_store_ &&
1058      !extension_blacklist_store_->BeginUpdate()) {
1059    RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1060    HandleCorruptDatabase();
1061    return false;
1062  }
1063
1064  if (side_effect_free_whitelist_store_ &&
1065      !side_effect_free_whitelist_store_->BeginUpdate()) {
1066    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1067    HandleCorruptDatabase();
1068    return false;
1069  }
1070
1071  std::vector<std::string> browse_listnames;
1072  browse_listnames.push_back(safe_browsing_util::kMalwareList);
1073  browse_listnames.push_back(safe_browsing_util::kPhishingList);
1074  UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);
1075
1076  if (download_store_.get()) {
1077    // This store used to contain kBinHashList in addition to
1078    // kBinUrlList.  Strip the stale data before generating the chunk
1079    // ranges to request.  UpdateChunkRanges() will traverse the chunk
1080    // list, so this is very cheap if there are no kBinHashList chunks.
1081    const int listid =
1082        safe_browsing_util::GetListId(safe_browsing_util::kBinHashList);
1083    DeleteChunksFromStore(download_store_.get(), listid);
1084
1085    // The above marks the chunks for deletion, but they are not
1086    // actually deleted until the database is rewritten.  The
1087    // following code removes the kBinHashList part of the request
1088    // before continuing so that UpdateChunkRanges() doesn't break.
1089    std::vector<std::string> download_listnames;
1090    download_listnames.push_back(safe_browsing_util::kBinUrlList);
1091    download_listnames.push_back(safe_browsing_util::kBinHashList);
1092    UpdateChunkRanges(download_store_.get(), download_listnames, lists);
1093    DCHECK_EQ(lists->back().name,
1094              std::string(safe_browsing_util::kBinHashList));
1095    lists->pop_back();
1096
1097    // TODO(shess): This problem could also be handled in
1098    // BeginUpdate() by detecting the chunks to delete and rewriting
1099    // the database before it's used.  When I implemented that, it
1100    // felt brittle, it might be easier to just wait for some future
1101    // format change.
1102  }
1103
1104  if (csd_whitelist_store_.get()) {
1105    std::vector<std::string> csd_whitelist_listnames;
1106    csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
1107    UpdateChunkRanges(csd_whitelist_store_.get(),
1108                      csd_whitelist_listnames, lists);
1109  }
1110
1111  if (download_whitelist_store_.get()) {
1112    std::vector<std::string> download_whitelist_listnames;
1113    download_whitelist_listnames.push_back(
1114        safe_browsing_util::kDownloadWhiteList);
1115    UpdateChunkRanges(download_whitelist_store_.get(),
1116                      download_whitelist_listnames, lists);
1117  }
1118
1119  if (extension_blacklist_store_) {
1120    UpdateChunkRanges(
1121        extension_blacklist_store_.get(),
1122        std::vector<std::string>(1, safe_browsing_util::kExtensionBlacklist),
1123        lists);
1124  }
1125
1126  if (side_effect_free_whitelist_store_) {
1127    UpdateChunkRanges(
1128        side_effect_free_whitelist_store_.get(),
1129        std::vector<std::string>(
1130            1, safe_browsing_util::kSideEffectFreeWhitelist),
1131        lists);
1132  }
1133
1134  corruption_detected_ = false;
1135  change_detected_ = false;
1136  return true;
1137}
1138
1139void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1140  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1141
1142  // The update may have failed due to corrupt storage (for instance,
1143  // an excessive number of invalid add_chunks and sub_chunks).
1144  // Double-check that the databases are valid.
1145  // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1146  // sections would allow throwing a corruption error in
1147  // UpdateStarted().
1148  if (!update_succeeded) {
1149    if (!browse_store_->CheckValidity())
1150      DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1151
1152    if (download_store_.get() && !download_store_->CheckValidity())
1153      DLOG(ERROR) << "Safe-browsing download database corrupt.";
1154
1155    if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1156      DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1157
1158    if (download_whitelist_store_.get() &&
1159        !download_whitelist_store_->CheckValidity()) {
1160      DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1161    }
1162
1163    if (extension_blacklist_store_ &&
1164        !extension_blacklist_store_->CheckValidity()) {
1165      DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1166    }
1167
1168    if (side_effect_free_whitelist_store_ &&
1169        !side_effect_free_whitelist_store_->CheckValidity()) {
1170      DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1171                  << "corrupt.";
1172    }
1173  }
1174
1175  if (corruption_detected_)
1176    return;
1177
1178  // Unroll the transaction if there was a protocol error or if the
1179  // transaction was empty.  This will leave the prefix set, the
1180  // pending hashes, and the prefix miss cache in place.
1181  if (!update_succeeded || !change_detected_) {
1182    // Track empty updates to answer questions at http://crbug.com/72216 .
1183    if (update_succeeded && !change_detected_)
1184      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1185    browse_store_->CancelUpdate();
1186    if (download_store_.get())
1187      download_store_->CancelUpdate();
1188    if (csd_whitelist_store_.get())
1189      csd_whitelist_store_->CancelUpdate();
1190    if (download_whitelist_store_.get())
1191      download_whitelist_store_->CancelUpdate();
1192    if (extension_blacklist_store_)
1193      extension_blacklist_store_->CancelUpdate();
1194    if (side_effect_free_whitelist_store_)
1195      side_effect_free_whitelist_store_->CancelUpdate();
1196    return;
1197  }
1198
1199  if (download_store_) {
1200    int64 size_bytes = UpdateHashPrefixStore(
1201        download_filename_,
1202        download_store_.get(),
1203        FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1204    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1205                         static_cast<int>(size_bytes / 1024));
1206  }
1207
1208  UpdateBrowseStore();
1209  UpdateWhitelistStore(csd_whitelist_filename_,
1210                       csd_whitelist_store_.get(),
1211                       &csd_whitelist_);
1212  UpdateWhitelistStore(download_whitelist_filename_,
1213                       download_whitelist_store_.get(),
1214                       &download_whitelist_);
1215
1216  if (extension_blacklist_store_) {
1217    int64 size_bytes = UpdateHashPrefixStore(
1218        extension_blacklist_filename_,
1219        extension_blacklist_store_.get(),
1220        FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1221    UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1222                         static_cast<int>(size_bytes / 1024));
1223  }
1224
1225  if (side_effect_free_whitelist_store_)
1226    UpdateSideEffectFreeWhitelistStore();
1227}
1228
1229void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1230    const base::FilePath& store_filename,
1231    SafeBrowsingStore* store,
1232    SBWhitelist* whitelist) {
1233  if (!store)
1234    return;
1235
1236  // For the whitelists, we don't cache and save full hashes since all
1237  // hashes are already full.
1238  std::vector<SBAddFullHash> empty_add_hashes;
1239
1240  // Not needed for the whitelists.
1241  std::set<SBPrefix> empty_miss_cache;
1242
1243  // Note: prefixes will not be empty.  The current data store implementation
1244  // stores all full-length hashes as both full and prefix hashes.
1245  SBAddPrefixes prefixes;
1246  std::vector<SBAddFullHash> full_hashes;
1247  if (!store->FinishUpdate(empty_add_hashes, empty_miss_cache, &prefixes,
1248                           &full_hashes)) {
1249    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1250    WhitelistEverything(whitelist);
1251    return;
1252  }
1253
1254#if defined(OS_MACOSX)
1255  base::mac::SetFileBackupExclusion(store_filename);
1256#endif
1257
1258  LoadWhitelist(full_hashes, whitelist);
1259}
1260
1261int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1262    const base::FilePath& store_filename,
1263    SafeBrowsingStore* store,
1264    FailureType failure_type) {
1265  // We don't cache and save full hashes.
1266  std::vector<SBAddFullHash> empty_add_hashes;
1267
1268  // Backend lookup happens only if a prefix is in add list.
1269  std::set<SBPrefix> empty_miss_cache;
1270
1271  // These results are not used after this call. Simply ignore the
1272  // returned value after FinishUpdate(...).
1273  SBAddPrefixes add_prefixes_result;
1274  std::vector<SBAddFullHash> add_full_hashes_result;
1275
1276  if (!store->FinishUpdate(empty_add_hashes,
1277                           empty_miss_cache,
1278                           &add_prefixes_result,
1279                           &add_full_hashes_result)) {
1280    RecordFailure(failure_type);
1281  }
1282
1283#if defined(OS_MACOSX)
1284  base::mac::SetFileBackupExclusion(store_filename);
1285#endif
1286
1287  return GetFileSizeOrZero(store_filename);
1288}
1289
1290void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1291  // Copy out the pending add hashes.  Copy rather than swapping in
1292  // case |ContainsBrowseURL()| is called before the new filter is complete.
1293  std::vector<SBAddFullHash> pending_add_hashes;
1294  {
1295    base::AutoLock locked(lookup_lock_);
1296    pending_add_hashes.insert(pending_add_hashes.end(),
1297                              pending_browse_hashes_.begin(),
1298                              pending_browse_hashes_.end());
1299  }
1300
1301  // Measure the amount of IO during the filter build.
1302  base::IoCounters io_before, io_after;
1303  base::ProcessHandle handle = base::Process::Current().handle();
1304  scoped_ptr<base::ProcessMetrics> metric(
1305#if !defined(OS_MACOSX)
1306      base::ProcessMetrics::CreateProcessMetrics(handle)
1307#else
1308      // Getting stats only for the current process is enough, so NULL is fine.
1309      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1310#endif
1311  );
1312
1313  // IoCounters are currently not supported on Mac, and may not be
1314  // available for Linux, so we check the result and only show IO
1315  // stats if they are available.
1316  const bool got_counters = metric->GetIOCounters(&io_before);
1317
1318  const base::TimeTicks before = base::TimeTicks::Now();
1319
1320  SBAddPrefixes add_prefixes;
1321  std::vector<SBAddFullHash> add_full_hashes;
1322  if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
1323                                   &add_prefixes, &add_full_hashes)) {
1324    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1325    return;
1326  }
1327
1328  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1329  // could be passed directly to |PrefixSet()|, removing the need for
1330  // |prefixes|.  For now, |prefixes| is useful while debugging
1331  // things.
1332  std::vector<SBPrefix> prefixes;
1333  prefixes.reserve(add_prefixes.size());
1334  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1335       iter != add_prefixes.end(); ++iter) {
1336    prefixes.push_back(iter->prefix);
1337  }
1338
1339  std::sort(prefixes.begin(), prefixes.end());
1340  scoped_ptr<safe_browsing::PrefixSet>
1341      prefix_set(new safe_browsing::PrefixSet(prefixes));
1342
1343  // This needs to be in sorted order by prefix for efficient access.
1344  std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1345            SBAddFullHashPrefixLess);
1346
1347  // Swap in the newly built filter and cache.
1348  {
1349    base::AutoLock locked(lookup_lock_);
1350    full_browse_hashes_.swap(add_full_hashes);
1351
1352    // TODO(shess): If |CacheHashResults()| is posted between the
1353    // earlier lock and this clear, those pending hashes will be lost.
1354    // It could be fixed by only removing hashes which were collected
1355    // at the earlier point.  I believe that is fail-safe as-is (the
1356    // hash will be fetched again).
1357    pending_browse_hashes_.clear();
1358    prefix_miss_cache_.clear();
1359    browse_prefix_set_.swap(prefix_set);
1360  }
1361
1362  DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1363           << (base::TimeTicks::Now() - before).InMilliseconds()
1364           << " ms total.  prefix count: " << add_prefixes.size();
1365  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1366
1367  // Persist the prefix set to disk.  Since only this thread changes
1368  // |browse_prefix_set_|, there is no need to lock.
1369  WritePrefixSet();
1370
1371  // Gather statistics.
1372  if (got_counters && metric->GetIOCounters(&io_after)) {
1373    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1374                         static_cast<int>(io_after.ReadTransferCount -
1375                                          io_before.ReadTransferCount) / 1024);
1376    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1377                         static_cast<int>(io_after.WriteTransferCount -
1378                                          io_before.WriteTransferCount) / 1024);
1379    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1380                         static_cast<int>(io_after.ReadOperationCount -
1381                                          io_before.ReadOperationCount));
1382    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1383                         static_cast<int>(io_after.WriteOperationCount -
1384                                          io_before.WriteOperationCount));
1385  }
1386
1387  int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
1388  UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1389                       static_cast<int>(file_size / 1024));
1390  file_size = GetFileSizeOrZero(browse_filename_);
1391  UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1392                       static_cast<int>(file_size / 1024));
1393
1394#if defined(OS_MACOSX)
1395  base::mac::SetFileBackupExclusion(browse_filename_);
1396#endif
1397}
1398
1399void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1400  std::vector<SBAddFullHash> empty_add_hashes;
1401  std::set<SBPrefix> empty_miss_cache;
1402  SBAddPrefixes add_prefixes;
1403  std::vector<SBAddFullHash> add_full_hashes_result;
1404
1405  if (!side_effect_free_whitelist_store_->FinishUpdate(
1406          empty_add_hashes,
1407          empty_miss_cache,
1408          &add_prefixes,
1409          &add_full_hashes_result)) {
1410    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1411    return;
1412  }
1413
1414  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1415  // could be passed directly to |PrefixSet()|, removing the need for
1416  // |prefixes|.  For now, |prefixes| is useful while debugging
1417  // things.
1418  std::vector<SBPrefix> prefixes;
1419  prefixes.reserve(add_prefixes.size());
1420  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1421       iter != add_prefixes.end(); ++iter) {
1422    prefixes.push_back(iter->prefix);
1423  }
1424
1425  std::sort(prefixes.begin(), prefixes.end());
1426  scoped_ptr<safe_browsing::PrefixSet>
1427      prefix_set(new safe_browsing::PrefixSet(prefixes));
1428
1429  // Swap in the newly built prefix set.
1430  {
1431    base::AutoLock locked(lookup_lock_);
1432    side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1433  }
1434
1435  const base::TimeTicks before = base::TimeTicks::Now();
1436  const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1437      side_effect_free_whitelist_prefix_set_filename_);
1438  DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1439           << "set in " << (base::TimeTicks::Now() - before).InMilliseconds()
1440           << " ms";
1441  UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1442                      base::TimeTicks::Now() - before);
1443
1444  if (!write_ok)
1445    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1446
1447  // Gather statistics.
1448  int64 file_size = GetFileSizeOrZero(
1449      side_effect_free_whitelist_prefix_set_filename_);
1450  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1451                       static_cast<int>(file_size / 1024));
1452  file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
1453  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1454                       static_cast<int>(file_size / 1024));
1455
1456#if defined(OS_MACOSX)
1457  base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1458  base::mac::SetFileBackupExclusion(
1459      side_effect_free_whitelist_prefix_set_filename_);
1460#endif
1461}
1462
1463void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1464  // Reset the database after the current task has unwound (but only
1465  // reset once within the scope of a given task).
1466  if (!reset_factory_.HasWeakPtrs()) {
1467    RecordFailure(FAILURE_DATABASE_CORRUPT);
1468    base::MessageLoop::current()->PostTask(FROM_HERE,
1469        base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1470                   reset_factory_.GetWeakPtr()));
1471  }
1472}
1473
1474void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1475  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1476  corruption_detected_ = true;  // Stop updating the database.
1477  ResetDatabase();
1478  DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1479}
1480
1481// TODO(shess): I'm not clear why this code doesn't have any
1482// real error-handling.
1483void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1484  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1485  DCHECK(!browse_prefix_set_filename_.empty());
1486
1487  // If there is no database, the filter cannot be used.
1488  base::PlatformFileInfo db_info;
1489  if (!file_util::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1490    return;
1491
1492  // Cleanup any stale bloom filter (no longer used).
1493  // TODO(shess): Track failure to delete?
1494  base::FilePath bloom_filter_filename =
1495      BloomFilterForFilename(browse_filename_);
1496  base::DeleteFile(bloom_filter_filename, false);
1497
1498  const base::TimeTicks before = base::TimeTicks::Now();
1499  browse_prefix_set_.reset(safe_browsing::PrefixSet::LoadFile(
1500      browse_prefix_set_filename_));
1501  DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1502           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1503  UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1504
1505  if (!browse_prefix_set_.get())
1506    RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1507}
1508
1509bool SafeBrowsingDatabaseNew::Delete() {
1510  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1511
1512  const bool r1 = browse_store_->Delete();
1513  if (!r1)
1514    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1515
1516  const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1517  if (!r2)
1518    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1519
1520  const bool r3 = csd_whitelist_store_.get() ?
1521      csd_whitelist_store_->Delete() : true;
1522  if (!r3)
1523    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1524
1525  const bool r4 = download_whitelist_store_.get() ?
1526      download_whitelist_store_->Delete() : true;
1527  if (!r4)
1528    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1529
1530  base::FilePath bloom_filter_filename =
1531      BloomFilterForFilename(browse_filename_);
1532  const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1533  if (!r5)
1534    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1535
1536  const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
1537  if (!r6)
1538    RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1539
1540  const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
1541  if (!r7)
1542    RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1543
1544  const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
1545                                    false);
1546  if (!r8)
1547    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1548
1549  const bool r9 = base::DeleteFile(
1550      side_effect_free_whitelist_prefix_set_filename_,
1551      false);
1552  if (!r9)
1553    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1554
1555  return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9;
1556}
1557
1558void SafeBrowsingDatabaseNew::WritePrefixSet() {
1559  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1560
1561  if (!browse_prefix_set_.get())
1562    return;
1563
1564  const base::TimeTicks before = base::TimeTicks::Now();
1565  const bool write_ok = browse_prefix_set_->WriteFile(
1566      browse_prefix_set_filename_);
1567  DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1568           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1569  UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1570
1571  if (!write_ok)
1572    RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1573
1574#if defined(OS_MACOSX)
1575  base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
1576#endif
1577}
1578
1579void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1580  base::AutoLock locked(lookup_lock_);
1581  whitelist->second = true;
1582  whitelist->first.clear();
1583}
1584
1585void SafeBrowsingDatabaseNew::LoadWhitelist(
1586    const std::vector<SBAddFullHash>& full_hashes,
1587    SBWhitelist* whitelist) {
1588  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1589  if (full_hashes.size() > kMaxWhitelistSize) {
1590    WhitelistEverything(whitelist);
1591    return;
1592  }
1593
1594  std::vector<SBFullHash> new_whitelist;
1595  new_whitelist.reserve(full_hashes.size());
1596  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1597       it != full_hashes.end(); ++it) {
1598    new_whitelist.push_back(it->full_hash);
1599  }
1600  std::sort(new_whitelist.begin(), new_whitelist.end());
1601
1602  SBFullHash kill_switch;
1603  crypto::SHA256HashString(kWhitelistKillSwitchUrl, &kill_switch,
1604                           sizeof(kill_switch));
1605  if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1606                         kill_switch)) {
1607    // The kill switch is whitelisted hence we whitelist all URLs.
1608    WhitelistEverything(whitelist);
1609  } else {
1610    base::AutoLock locked(lookup_lock_);
1611    whitelist->second = false;
1612    whitelist->first.swap(new_whitelist);
1613  }
1614}
1615
1616bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1617  SBFullHash malware_kill_switch;
1618  crypto::SHA256HashString(kMalwareIPKillSwitchUrl, &malware_kill_switch,
1619                           sizeof(malware_kill_switch));
1620  std::vector<SBFullHash> full_hashes;
1621  full_hashes.push_back(malware_kill_switch);
1622  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1623}
1624