safe_browsing_database.cc revision 0f1bc08d4cfcc34181b0b5cbf065c40f687bf740
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7#include <algorithm>
8#include <iterator>
9
10#include "base/bind.h"
11#include "base/file_util.h"
12#include "base/message_loop/message_loop.h"
13#include "base/metrics/histogram.h"
14#include "base/metrics/stats_counters.h"
15#include "base/process/process.h"
16#include "base/process/process_metrics.h"
17#include "base/sha1.h"
18#include "base/strings/string_number_conversions.h"
19#include "base/strings/stringprintf.h"
20#include "base/time/time.h"
21#include "chrome/browser/safe_browsing/prefix_set.h"
22#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23#include "content/public/browser/browser_thread.h"
24#include "crypto/sha2.h"
25#include "net/base/net_util.h"
26#include "url/gurl.h"
27
28#if defined(OS_MACOSX)
29#include "base/mac/mac_util.h"
30#endif
31
32using content::BrowserThread;
33
34namespace {
35
36// Filename suffix for the bloom filter.
37const base::FilePath::CharType kBloomFilterFile[] =
38    FILE_PATH_LITERAL(" Filter 2");
39// Filename suffix for the prefix set.
40const base::FilePath::CharType kPrefixSetFile[] =
41    FILE_PATH_LITERAL(" Prefix Set");
42// Filename suffix for download store.
43const base::FilePath::CharType kDownloadDBFile[] =
44    FILE_PATH_LITERAL(" Download");
45// Filename suffix for client-side phishing detection whitelist store.
46const base::FilePath::CharType kCsdWhitelistDBFile[] =
47    FILE_PATH_LITERAL(" Csd Whitelist");
48// Filename suffix for the download whitelist store.
49const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50    FILE_PATH_LITERAL(" Download Whitelist");
51// Filename suffix for the extension blacklist store.
52const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53    FILE_PATH_LITERAL(" Extension Blacklist");
54// Filename suffix for the side-effect free whitelist store.
55const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56    FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57// Filename suffix for the csd malware IP blacklist store.
58const base::FilePath::CharType kIPBlacklistDBFile[] =
59    FILE_PATH_LITERAL(" IP Blacklist");
60
61// Filename suffix for browse store.
62// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63// Unfortunately, to change the name implies lots of transition code
64// for little benefit.  If/when file formats change (say to put all
65// the data in one file), that would be a convenient point to rectify
66// this.
67const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
68
69// The maximum staleness for a cached entry.
70const int kMaxStalenessMinutes = 45;
71
72// Maximum number of entries we allow in any of the whitelists.
73// If a whitelist on disk contains more entries then all lookups to
74// the whitelist will be considered a match.
75const size_t kMaxWhitelistSize = 5000;
76
77// If the hash of this exact expression is on a whitelist then all
78// lookups to this whitelist will be considered a match.
79const char kWhitelistKillSwitchUrl[] =
80    "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
81
82// If the hash of this exact expression is on a whitelist then the
83// malware IP blacklisting feature will be disabled in csd.
84// Don't change this!
85const char kMalwareIPKillSwitchUrl[] =
86    "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
87
88const size_t kMaxIpPrefixSize = 128;
89const size_t kMinIpPrefixSize = 1;
90
91// To save space, the incoming |chunk_id| and |list_id| are combined
92// into an |encoded_chunk_id| for storage by shifting the |list_id|
93// into the low-order bits.  These functions decode that information.
94// TODO(lzheng): It was reasonable when database is saved in sqlite, but
95// there should be better ways to save chunk_id and list_id after we use
96// SafeBrowsingStoreFile.
97int GetListIdBit(const int encoded_chunk_id) {
98  return encoded_chunk_id & 1;
99}
100int DecodeChunkId(int encoded_chunk_id) {
101  return encoded_chunk_id >> 1;
102}
103int EncodeChunkId(const int chunk, const int list_id) {
104  DCHECK_NE(list_id, safe_browsing_util::INVALID);
105  return chunk << 1 | list_id % 2;
106}
107
108// Generate the set of full hashes to check for |url|.  If
109// |include_whitelist_hashes| is true we will generate additional path-prefixes
110// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
111// whitelist it should also match /foo/bar which is not the case for all the
112// other lists.  We'll also always add a pattern for the empty path.
113// TODO(shess): This function is almost the same as
114// |CompareFullHashes()| in safe_browsing_util.cc, except that code
115// does an early exit on match.  Since match should be the infrequent
116// case (phishing or malware found), consider combining this function
117// with that one.
118void BrowseFullHashesToCheck(const GURL& url,
119                             bool include_whitelist_hashes,
120                             std::vector<SBFullHash>* full_hashes) {
121  std::vector<std::string> hosts;
122  if (url.HostIsIPAddress()) {
123    hosts.push_back(url.host());
124  } else {
125    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
126  }
127
128  std::vector<std::string> paths;
129  safe_browsing_util::GeneratePathsToCheck(url, &paths);
130
131  for (size_t i = 0; i < hosts.size(); ++i) {
132    for (size_t j = 0; j < paths.size(); ++j) {
133      const std::string& path = paths[j];
134      SBFullHash full_hash;
135      crypto::SHA256HashString(hosts[i] + path, &full_hash,
136                               sizeof(full_hash));
137      full_hashes->push_back(full_hash);
138
139      // We may have /foo as path-prefix in the whitelist which should
140      // also match with /foo/bar and /foo?bar.  Hence, for every path
141      // that ends in '/' we also add the path without the slash.
142      if (include_whitelist_hashes &&
143          path.size() > 1 &&
144          path[path.size() - 1] == '/') {
145        crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
146                                 &full_hash, sizeof(full_hash));
147        full_hashes->push_back(full_hash);
148      }
149    }
150  }
151}
152
153// Get the prefixes matching the download |urls|.
154void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
155                            std::vector<SBPrefix>* prefixes) {
156  std::vector<SBFullHash> full_hashes;
157  for (size_t i = 0; i < urls.size(); ++i)
158    BrowseFullHashesToCheck(urls[i], false, &full_hashes);
159
160  for (size_t i = 0; i < full_hashes.size(); ++i)
161    prefixes->push_back(full_hashes[i].prefix);
162}
163
164// Helper function to compare addprefixes in |store| with |prefixes|.
165// The |list_bit| indicates which list (url or hash) to compare.
166//
167// Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
168// the actual matching prefixes.
169bool MatchAddPrefixes(SafeBrowsingStore* store,
170                      int list_bit,
171                      const std::vector<SBPrefix>& prefixes,
172                      std::vector<SBPrefix>* prefix_hits) {
173  prefix_hits->clear();
174  bool found_match = false;
175
176  SBAddPrefixes add_prefixes;
177  store->GetAddPrefixes(&add_prefixes);
178  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
179       iter != add_prefixes.end(); ++iter) {
180    for (size_t j = 0; j < prefixes.size(); ++j) {
181      const SBPrefix& prefix = prefixes[j];
182      if (prefix == iter->prefix &&
183          GetListIdBit(iter->chunk_id) == list_bit) {
184        prefix_hits->push_back(prefix);
185        found_match = true;
186      }
187    }
188  }
189  return found_match;
190}
191
192// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
193// add them to |full_hits| if not expired.  "Not expired" is when
194// either |last_update| was recent enough, or the item has been
195// received recently enough.  Expired items are not deleted because a
196// future update may make them acceptable again.
197//
198// For efficiency reasons the code walks |prefix_hits| and
199// |full_hashes| in parallel, so they must be sorted by prefix.
200void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
201                                  const std::vector<SBAddFullHash>& full_hashes,
202                                  std::vector<SBFullHashResult>* full_hits,
203                                  base::Time last_update) {
204  const base::Time expire_time =
205      base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
206
207  std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
208  std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
209
210  while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
211    if (*piter < hiter->full_hash.prefix) {
212      ++piter;
213    } else if (hiter->full_hash.prefix < *piter) {
214      ++hiter;
215    } else {
216      if (expire_time < last_update ||
217          expire_time.ToTimeT() < hiter->received) {
218        SBFullHashResult result;
219        const int list_bit = GetListIdBit(hiter->chunk_id);
220        DCHECK(list_bit == safe_browsing_util::MALWARE ||
221               list_bit == safe_browsing_util::PHISH);
222        const safe_browsing_util::ListType list_id =
223            static_cast<safe_browsing_util::ListType>(list_bit);
224        if (!safe_browsing_util::GetListName(list_id, &result.list_name))
225          continue;
226        result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
227        result.hash = hiter->full_hash;
228        full_hits->push_back(result);
229      }
230
231      // Only increment |hiter|, |piter| might have multiple hits.
232      ++hiter;
233    }
234  }
235}
236
237// This function generates a chunk range string for |chunks|. It
238// outputs one chunk range string per list and writes it to the
239// |list_ranges| vector.  We expect |list_ranges| to already be of the
240// right size.  E.g., if |chunks| contains chunks with two different
241// list ids then |list_ranges| must contain two elements.
242void GetChunkRanges(const std::vector<int>& chunks,
243                    std::vector<std::string>* list_ranges) {
244  // Since there are 2 possible list ids, there must be exactly two
245  // list ranges.  Even if the chunk data should only contain one
246  // line, this code has to somehow handle corruption.
247  DCHECK_EQ(2U, list_ranges->size());
248
249  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
250  for (std::vector<int>::const_iterator iter = chunks.begin();
251       iter != chunks.end(); ++iter) {
252    int mod_list_id = GetListIdBit(*iter);
253    DCHECK_GE(mod_list_id, 0);
254    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
255    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
256  }
257  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
258    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
259  }
260}
261
262// Helper function to create chunk range lists for Browse related
263// lists.
264void UpdateChunkRanges(SafeBrowsingStore* store,
265                       const std::vector<std::string>& listnames,
266                       std::vector<SBListChunkRanges>* lists) {
267  DCHECK_GT(listnames.size(), 0U);
268  DCHECK_LE(listnames.size(), 2U);
269  std::vector<int> add_chunks;
270  std::vector<int> sub_chunks;
271  store->GetAddChunks(&add_chunks);
272  store->GetSubChunks(&sub_chunks);
273
274  // Always decode 2 ranges, even if only the first one is expected.
275  // The loop below will only load as many into |lists| as |listnames|
276  // indicates.
277  std::vector<std::string> adds(2);
278  std::vector<std::string> subs(2);
279  GetChunkRanges(add_chunks, &adds);
280  GetChunkRanges(sub_chunks, &subs);
281
282  for (size_t i = 0; i < listnames.size(); ++i) {
283    const std::string& listname = listnames[i];
284    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
285              static_cast<int>(i % 2));
286    DCHECK_NE(safe_browsing_util::GetListId(listname),
287              safe_browsing_util::INVALID);
288    lists->push_back(SBListChunkRanges(listname));
289    lists->back().adds.swap(adds[i]);
290    lists->back().subs.swap(subs[i]);
291  }
292}
293
294// Helper for deleting chunks left over from obsolete lists.
295void DeleteChunksFromStore(SafeBrowsingStore* store, int listid){
296  std::vector<int> add_chunks;
297  size_t adds_deleted = 0;
298  store->GetAddChunks(&add_chunks);
299  for (std::vector<int>::const_iterator iter = add_chunks.begin();
300       iter != add_chunks.end(); ++iter) {
301    if (GetListIdBit(*iter) == GetListIdBit(listid)) {
302      adds_deleted++;
303      store->DeleteAddChunk(*iter);
304    }
305  }
306  if (adds_deleted > 0)
307    UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashAddsDeleted", adds_deleted);
308
309  std::vector<int> sub_chunks;
310  size_t subs_deleted = 0;
311  store->GetSubChunks(&sub_chunks);
312  for (std::vector<int>::const_iterator iter = sub_chunks.begin();
313       iter != sub_chunks.end(); ++iter) {
314    if (GetListIdBit(*iter) == GetListIdBit(listid)) {
315      subs_deleted++;
316      store->DeleteSubChunk(*iter);
317    }
318  }
319  if (subs_deleted > 0)
320    UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted);
321}
322
323// Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
324// safe_browsing_store.h orders on both chunk-id and prefix.
325bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
326  return a.full_hash.prefix < b.full_hash.prefix;
327}
328
329// This code always checks for non-zero file size.  This helper makes
330// that less verbose.
331int64 GetFileSizeOrZero(const base::FilePath& file_path) {
332  int64 size_64;
333  if (!file_util::GetFileSize(file_path, &size_64))
334    return 0;
335  return size_64;
336}
337
338}  // namespace
339
340// The default SafeBrowsingDatabaseFactory.
341class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
342 public:
343  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
344      bool enable_download_protection,
345      bool enable_client_side_whitelist,
346      bool enable_download_whitelist,
347      bool enable_extension_blacklist,
348      bool enable_side_effect_free_whitelist,
349      bool enable_ip_blacklist) OVERRIDE {
350    return new SafeBrowsingDatabaseNew(
351        new SafeBrowsingStoreFile,
352        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
353        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
354        enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
355        enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
356        enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
357        enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
358  }
359
360  SafeBrowsingDatabaseFactoryImpl() { }
361
362 private:
363  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
364};
365
366// static
367SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
368
369// Factory method, non-thread safe. Caller has to make sure this s called
370// on SafeBrowsing Thread.
371// TODO(shess): There's no need for a factory any longer.  Convert
372// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
373// callers just construct things directly.
374SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
375    bool enable_download_protection,
376    bool enable_client_side_whitelist,
377    bool enable_download_whitelist,
378    bool enable_extension_blacklist,
379    bool enable_side_effect_free_whitelist,
380    bool enable_ip_blacklist) {
381  if (!factory_)
382    factory_ = new SafeBrowsingDatabaseFactoryImpl();
383  return factory_->CreateSafeBrowsingDatabase(
384      enable_download_protection,
385      enable_client_side_whitelist,
386      enable_download_whitelist,
387      enable_extension_blacklist,
388      enable_side_effect_free_whitelist,
389      enable_ip_blacklist);
390}
391
392SafeBrowsingDatabase::~SafeBrowsingDatabase() {
393}
394
395// static
396base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
397    const base::FilePath& db_base_filename) {
398  return base::FilePath(db_base_filename.value() + kBrowseDBFile);
399}
400
401// static
402base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
403    const base::FilePath& db_base_filename) {
404  return base::FilePath(db_base_filename.value() + kDownloadDBFile);
405}
406
407// static
408base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
409    const base::FilePath& db_filename) {
410  return base::FilePath(db_filename.value() + kBloomFilterFile);
411}
412
413// static
414base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
415    const base::FilePath& db_filename) {
416  return base::FilePath(db_filename.value() + kPrefixSetFile);
417}
418
419// static
420base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
421    const base::FilePath& db_filename) {
422  return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
423}
424
425// static
426base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
427    const base::FilePath& db_filename) {
428  return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
429}
430
431// static
432base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
433    const base::FilePath& db_filename) {
434  return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
435}
436
437// static
438base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
439    const base::FilePath& db_filename) {
440  return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
441}
442
443// static
444base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
445    const base::FilePath& db_filename) {
446  return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
447}
448
449SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
450  if (list_id == safe_browsing_util::PHISH ||
451      list_id == safe_browsing_util::MALWARE) {
452    return browse_store_.get();
453  } else if (list_id == safe_browsing_util::BINURL ||
454             list_id == safe_browsing_util::BINHASH) {
455    return download_store_.get();
456  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
457    return csd_whitelist_store_.get();
458  } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
459    return download_whitelist_store_.get();
460  } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
461    return extension_blacklist_store_.get();
462  } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
463    return side_effect_free_whitelist_store_.get();
464  } else if (list_id == safe_browsing_util::IPBLACKLIST) {
465    return ip_blacklist_store_.get();
466  }
467  return NULL;
468}
469
470// static
471void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
472  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
473                            FAILURE_DATABASE_MAX);
474}
475
476SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
477    : creation_loop_(base::MessageLoop::current()),
478      browse_store_(new SafeBrowsingStoreFile),
479      reset_factory_(this),
480      corruption_detected_(false),
481      change_detected_(false) {
482  DCHECK(browse_store_.get());
483  DCHECK(!download_store_.get());
484  DCHECK(!csd_whitelist_store_.get());
485  DCHECK(!download_whitelist_store_.get());
486  DCHECK(!extension_blacklist_store_.get());
487  DCHECK(!side_effect_free_whitelist_store_.get());
488  DCHECK(!ip_blacklist_store_.get());
489}
490
491SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
492    SafeBrowsingStore* browse_store,
493    SafeBrowsingStore* download_store,
494    SafeBrowsingStore* csd_whitelist_store,
495    SafeBrowsingStore* download_whitelist_store,
496    SafeBrowsingStore* extension_blacklist_store,
497    SafeBrowsingStore* side_effect_free_whitelist_store,
498    SafeBrowsingStore* ip_blacklist_store)
499    : creation_loop_(base::MessageLoop::current()),
500      browse_store_(browse_store),
501      download_store_(download_store),
502      csd_whitelist_store_(csd_whitelist_store),
503      download_whitelist_store_(download_whitelist_store),
504      extension_blacklist_store_(extension_blacklist_store),
505      side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
506      ip_blacklist_store_(ip_blacklist_store),
507      reset_factory_(this),
508      corruption_detected_(false) {
509  DCHECK(browse_store_.get());
510}
511
512SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
513  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
514}
515
516void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
517  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
518  // Ensure we haven't been run before.
519  DCHECK(browse_filename_.empty());
520  DCHECK(download_filename_.empty());
521  DCHECK(csd_whitelist_filename_.empty());
522  DCHECK(download_whitelist_filename_.empty());
523  DCHECK(extension_blacklist_filename_.empty());
524  DCHECK(side_effect_free_whitelist_filename_.empty());
525  DCHECK(ip_blacklist_filename_.empty());
526
527  browse_filename_ = BrowseDBFilename(filename_base);
528  browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
529
530  browse_store_->Init(
531      browse_filename_,
532      base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
533                 base::Unretained(this)));
534  DVLOG(1) << "Init browse store: " << browse_filename_.value();
535
536  {
537    // NOTE: There is no need to grab the lock in this function, since
538    // until it returns, there are no pointers to this class on other
539    // threads.  Then again, that means there is no possibility of
540    // contention on the lock...
541    base::AutoLock locked(lookup_lock_);
542    full_browse_hashes_.clear();
543    pending_browse_hashes_.clear();
544    LoadPrefixSet();
545  }
546
547  if (download_store_.get()) {
548    download_filename_ = DownloadDBFilename(filename_base);
549    download_store_->Init(
550        download_filename_,
551        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
552                   base::Unretained(this)));
553    DVLOG(1) << "Init download store: " << download_filename_.value();
554  }
555
556  if (csd_whitelist_store_.get()) {
557    csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
558    csd_whitelist_store_->Init(
559        csd_whitelist_filename_,
560        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
561                   base::Unretained(this)));
562    DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
563    std::vector<SBAddFullHash> full_hashes;
564    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
565      LoadWhitelist(full_hashes, &csd_whitelist_);
566    } else {
567      WhitelistEverything(&csd_whitelist_);
568    }
569  } else {
570    WhitelistEverything(&csd_whitelist_);  // Just to be safe.
571  }
572
573  if (download_whitelist_store_.get()) {
574    download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
575    download_whitelist_store_->Init(
576        download_whitelist_filename_,
577        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
578                   base::Unretained(this)));
579    DVLOG(1) << "Init download whitelist store: "
580             << download_whitelist_filename_.value();
581    std::vector<SBAddFullHash> full_hashes;
582    if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
583      LoadWhitelist(full_hashes, &download_whitelist_);
584    } else {
585      WhitelistEverything(&download_whitelist_);
586    }
587  } else {
588    WhitelistEverything(&download_whitelist_);  // Just to be safe.
589  }
590
591  if (extension_blacklist_store_.get()) {
592    extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
593    extension_blacklist_store_->Init(
594        extension_blacklist_filename_,
595        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
596                   base::Unretained(this)));
597    DVLOG(1) << "Init extension blacklist store: "
598             << extension_blacklist_filename_.value();
599  }
600
601  if (side_effect_free_whitelist_store_.get()) {
602    side_effect_free_whitelist_filename_ =
603        SideEffectFreeWhitelistDBFilename(filename_base);
604    side_effect_free_whitelist_prefix_set_filename_ =
605        PrefixSetForFilename(side_effect_free_whitelist_filename_);
606    side_effect_free_whitelist_store_->Init(
607        side_effect_free_whitelist_filename_,
608        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
609                   base::Unretained(this)));
610    DVLOG(1) << "Init side-effect free whitelist store: "
611             << side_effect_free_whitelist_filename_.value();
612
613    // If there is no database, the filter cannot be used.
614    base::PlatformFileInfo db_info;
615    if (file_util::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
616        && db_info.size != 0) {
617      const base::TimeTicks before = base::TimeTicks::Now();
618      side_effect_free_whitelist_prefix_set_.reset(
619          safe_browsing::PrefixSet::LoadFile(
620              side_effect_free_whitelist_prefix_set_filename_));
621      DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
622               << "prefix set in "
623               << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
624      UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
625                          base::TimeTicks::Now() - before);
626      if (!side_effect_free_whitelist_prefix_set_.get())
627        RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
628    }
629  } else {
630    // Delete any files of the side-effect free sidelist that may be around
631    // from when it was previously enabled.
632    SafeBrowsingStoreFile::DeleteStore(
633        SideEffectFreeWhitelistDBFilename(filename_base));
634  }
635
636  if (ip_blacklist_store_.get()) {
637    ip_blacklist_filename_ = IpBlacklistDBFilename(filename_base);
638    ip_blacklist_store_->Init(
639        ip_blacklist_filename_,
640        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
641                   base::Unretained(this)));
642    DVLOG(1) << "SafeBrowsingDatabaseNew read ip blacklist: "
643             << ip_blacklist_filename_.value();
644    std::vector<SBAddFullHash> full_hashes;
645    if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
646      LoadIpBlacklist(full_hashes);
647    } else {
648      DVLOG(1) << "Unable to load full hashes from the IP blacklist.";
649      LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
650    }
651  }
652}
653
654bool SafeBrowsingDatabaseNew::ResetDatabase() {
655  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
656
657  // Delete files on disk.
658  // TODO(shess): Hard to see where one might want to delete without a
659  // reset.  Perhaps inline |Delete()|?
660  if (!Delete())
661    return false;
662
663  // Reset objects in memory.
664  {
665    base::AutoLock locked(lookup_lock_);
666    full_browse_hashes_.clear();
667    pending_browse_hashes_.clear();
668    prefix_miss_cache_.clear();
669    browse_prefix_set_.reset();
670    side_effect_free_whitelist_prefix_set_.reset();
671    ip_blacklist_.clear();
672  }
673  // Wants to acquire the lock itself.
674  WhitelistEverything(&csd_whitelist_);
675  WhitelistEverything(&download_whitelist_);
676  return true;
677}
678
679// TODO(lzheng): Remove matching_list, it is not used anywhere.
680bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
681    const GURL& url,
682    std::string* matching_list,
683    std::vector<SBPrefix>* prefix_hits,
684    std::vector<SBFullHashResult>* full_hits,
685    base::Time last_update) {
686  // Clear the results first.
687  matching_list->clear();
688  prefix_hits->clear();
689  full_hits->clear();
690
691  std::vector<SBFullHash> full_hashes;
692  BrowseFullHashesToCheck(url, false, &full_hashes);
693  if (full_hashes.empty())
694    return false;
695
696  // This function is called on the I/O thread, prevent changes to
697  // filter and caches.
698  base::AutoLock locked(lookup_lock_);
699
700  // |browse_prefix_set_| is empty until it is either read from disk, or the
701  // first update populates it.  Bail out without a hit if not yet
702  // available.
703  if (!browse_prefix_set_.get())
704    return false;
705
706  size_t miss_count = 0;
707  for (size_t i = 0; i < full_hashes.size(); ++i) {
708    const SBPrefix prefix = full_hashes[i].prefix;
709    if (browse_prefix_set_->Exists(prefix)) {
710      prefix_hits->push_back(prefix);
711      if (prefix_miss_cache_.count(prefix) > 0)
712        ++miss_count;
713    }
714  }
715
716  // If all the prefixes are cached as 'misses', don't issue a GetHash.
717  if (miss_count == prefix_hits->size())
718    return false;
719
720  // Find the matching full-hash results.  |full_browse_hashes_| are from the
721  // database, |pending_browse_hashes_| are from GetHash requests between
722  // updates.
723  std::sort(prefix_hits->begin(), prefix_hits->end());
724
725  GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
726                               full_hits, last_update);
727  GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
728                               full_hits, last_update);
729  return true;
730}
731
732bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
733    const std::vector<GURL>& urls,
734    std::vector<SBPrefix>* prefix_hits) {
735  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
736
737  // Ignore this check when download checking is not enabled.
738  if (!download_store_.get())
739    return false;
740
741  std::vector<SBPrefix> prefixes;
742  GetDownloadUrlPrefixes(urls, &prefixes);
743  return MatchAddPrefixes(download_store_.get(),
744                          safe_browsing_util::BINURL % 2,
745                          prefixes,
746                          prefix_hits);
747}
748
749bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
750    const SBPrefix& prefix) {
751  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
752
753  // Ignore this check when download store is not available.
754  if (!download_store_.get())
755    return false;
756
757  std::vector<SBPrefix> prefix_hits;
758  return MatchAddPrefixes(download_store_.get(),
759                          safe_browsing_util::BINHASH % 2,
760                          std::vector<SBPrefix>(1, prefix),
761                          &prefix_hits);
762}
763
764bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
765  // This method is theoretically thread-safe but we expect all calls to
766  // originate from the IO thread.
767  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
768  std::vector<SBFullHash> full_hashes;
769  BrowseFullHashesToCheck(url, true, &full_hashes);
770  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
771}
772
773bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
774  std::vector<SBFullHash> full_hashes;
775  BrowseFullHashesToCheck(url, true, &full_hashes);
776  return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
777}
778
779bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
780    const std::vector<SBPrefix>& prefixes,
781    std::vector<SBPrefix>* prefix_hits) {
782  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
783  if (!extension_blacklist_store_)
784    return false;
785
786  return MatchAddPrefixes(extension_blacklist_store_.get(),
787                          safe_browsing_util::EXTENSIONBLACKLIST % 2,
788                          prefixes,
789                          prefix_hits);
790}
791
792bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
793    const GURL& url) {
794  SBFullHash full_hash;
795  std::string host;
796  std::string path;
797  std::string query;
798  safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
799  std::string url_to_check = host + path;
800  if (!query.empty())
801    url_to_check +=  "?" + query;
802  crypto::SHA256HashString(url_to_check, &full_hash, sizeof(full_hash));
803
804  // This function can be called on any thread, so lock against any changes
805  base::AutoLock locked(lookup_lock_);
806
807  // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
808  // from disk, or the first update populates it.  Bail out without a hit if
809  // not yet available.
810  if (!side_effect_free_whitelist_prefix_set_.get())
811    return false;
812
813  return side_effect_free_whitelist_prefix_set_->Exists(full_hash.prefix);
814}
815
816bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
817  net::IPAddressNumber ip_number;
818  if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
819    DVLOG(2) << "Unable to parse IP address: '" << ip_address << "'";
820    return false;
821  }
822  if (ip_number.size() == net::kIPv4AddressSize) {
823    ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
824  }
825  if (ip_number.size() != net::kIPv6AddressSize) {
826    DVLOG(2) << "Unable to convert IPv4 address to IPv6: '"
827             << ip_address << "'";
828    return false;  // better safe than sorry.
829  }
830  // This function can be called from any thread.
831  base::AutoLock locked(lookup_lock_);
832  for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
833       it != ip_blacklist_.end();
834       ++it) {
835    const std::string& mask = it->first;
836    DCHECK_EQ(mask.size(), ip_number.size());
837    std::string subnet(net::kIPv6AddressSize, '\0');
838    for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
839      subnet[i] = ip_number[i] & mask[i];
840    }
841    const std::string hash = base::SHA1HashString(subnet);
842    DVLOG(2) << "Lookup Malware IP: "
843             << " ip:" << ip_address
844             << " mask:" << base::HexEncode(mask.data(), mask.size())
845             << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
846             << " hash:" << base::HexEncode(hash.data(), hash.size());
847    if (it->second.count(hash) > 0) {
848      return true;
849    }
850  }
851  return false;
852}
853
854bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
855    const std::string& str) {
856  SBFullHash hash;
857  crypto::SHA256HashString(str, &hash, sizeof(hash));
858  std::vector<SBFullHash> hashes;
859  hashes.push_back(hash);
860  return ContainsWhitelistedHashes(download_whitelist_, hashes);
861}
862
863bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
864    const SBWhitelist& whitelist,
865    const std::vector<SBFullHash>& hashes) {
866  base::AutoLock l(lookup_lock_);
867  if (whitelist.second)
868    return true;
869  for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
870       it != hashes.end(); ++it) {
871    if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), *it))
872      return true;
873  }
874  return false;
875}
876
877// Helper to insert entries for all of the prefixes or full hashes in
878// |entry| into the store.
879void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
880                                        const SBEntry* entry, int list_id) {
881  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
882
883  SafeBrowsingStore* store = GetStore(list_id);
884  if (!store) return;
885
886  STATS_COUNTER("SB.HostInsert", 1);
887  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
888  const int count = entry->prefix_count();
889
890  DCHECK(!entry->IsSub());
891  if (!count) {
892    // No prefixes, use host instead.
893    STATS_COUNTER("SB.PrefixAdd", 1);
894    store->WriteAddPrefix(encoded_chunk_id, host);
895  } else if (entry->IsPrefix()) {
896    // Prefixes only.
897    for (int i = 0; i < count; i++) {
898      const SBPrefix prefix = entry->PrefixAt(i);
899      STATS_COUNTER("SB.PrefixAdd", 1);
900      store->WriteAddPrefix(encoded_chunk_id, prefix);
901    }
902  } else {
903    // Prefixes and hashes.
904    const base::Time receive_time = base::Time::Now();
905    for (int i = 0; i < count; ++i) {
906      const SBFullHash full_hash = entry->FullHashAt(i);
907      const SBPrefix prefix = full_hash.prefix;
908
909      STATS_COUNTER("SB.PrefixAdd", 1);
910      store->WriteAddPrefix(encoded_chunk_id, prefix);
911
912      STATS_COUNTER("SB.PrefixAddFull", 1);
913      store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
914    }
915  }
916}
917
918// Helper to iterate over all the entries in the hosts in |chunks| and
919// add them to the store.
920void SafeBrowsingDatabaseNew::InsertAddChunks(
921    const safe_browsing_util::ListType list_id,
922    const SBChunkList& chunks) {
923  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
924
925  SafeBrowsingStore* store = GetStore(list_id);
926  if (!store) return;
927
928  for (SBChunkList::const_iterator citer = chunks.begin();
929       citer != chunks.end(); ++citer) {
930    const int chunk_id = citer->chunk_number;
931
932    // The server can give us a chunk that we already have because
933    // it's part of a range.  Don't add it again.
934    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
935    if (store->CheckAddChunk(encoded_chunk_id))
936      continue;
937
938    store->SetAddChunk(encoded_chunk_id);
939    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
940         hiter != citer->hosts.end(); ++hiter) {
941      // NOTE: Could pass |encoded_chunk_id|, but then inserting add
942      // chunks would look different from inserting sub chunks.
943      InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
944    }
945  }
946}
947
948// Helper to insert entries for all of the prefixes or full hashes in
949// |entry| into the store.
950void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
951                                        const SBEntry* entry, int list_id) {
952  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
953
954  SafeBrowsingStore* store = GetStore(list_id);
955  if (!store) return;
956
957  STATS_COUNTER("SB.HostDelete", 1);
958  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
959  const int count = entry->prefix_count();
960
961  DCHECK(entry->IsSub());
962  if (!count) {
963    // No prefixes, use host instead.
964    STATS_COUNTER("SB.PrefixSub", 1);
965    const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
966    store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
967  } else if (entry->IsPrefix()) {
968    // Prefixes only.
969    for (int i = 0; i < count; i++) {
970      const SBPrefix prefix = entry->PrefixAt(i);
971      const int add_chunk_id =
972          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
973
974      STATS_COUNTER("SB.PrefixSub", 1);
975      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
976    }
977  } else {
978    // Prefixes and hashes.
979    for (int i = 0; i < count; ++i) {
980      const SBFullHash full_hash = entry->FullHashAt(i);
981      const int add_chunk_id =
982          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
983
984      STATS_COUNTER("SB.PrefixSub", 1);
985      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
986
987      STATS_COUNTER("SB.PrefixSubFull", 1);
988      store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
989    }
990  }
991}
992
993// Helper to iterate over all the entries in the hosts in |chunks| and
994// add them to the store.
995void SafeBrowsingDatabaseNew::InsertSubChunks(
996    safe_browsing_util::ListType list_id,
997    const SBChunkList& chunks) {
998  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
999
1000  SafeBrowsingStore* store = GetStore(list_id);
1001  if (!store) return;
1002
1003  for (SBChunkList::const_iterator citer = chunks.begin();
1004       citer != chunks.end(); ++citer) {
1005    const int chunk_id = citer->chunk_number;
1006
1007    // The server can give us a chunk that we already have because
1008    // it's part of a range.  Don't add it again.
1009    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
1010    if (store->CheckSubChunk(encoded_chunk_id))
1011      continue;
1012
1013    store->SetSubChunk(encoded_chunk_id);
1014    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
1015         hiter != citer->hosts.end(); ++hiter) {
1016      InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
1017    }
1018  }
1019}
1020
1021void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
1022                                           const SBChunkList& chunks) {
1023  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1024
1025  if (corruption_detected_ || chunks.empty())
1026    return;
1027
1028  const base::TimeTicks before = base::TimeTicks::Now();
1029
1030  const safe_browsing_util::ListType list_id =
1031      safe_browsing_util::GetListId(list_name);
1032  DVLOG(2) << list_name << ": " << list_id;
1033
1034  SafeBrowsingStore* store = GetStore(list_id);
1035  if (!store) return;
1036
1037  change_detected_ = true;
1038
1039  store->BeginChunk();
1040  if (chunks.front().is_add) {
1041    InsertAddChunks(list_id, chunks);
1042  } else {
1043    InsertSubChunks(list_id, chunks);
1044  }
1045  store->FinishChunk();
1046
1047  UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
1048}
1049
1050void SafeBrowsingDatabaseNew::DeleteChunks(
1051    const std::vector<SBChunkDelete>& chunk_deletes) {
1052  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1053
1054  if (corruption_detected_ || chunk_deletes.empty())
1055    return;
1056
1057  const std::string& list_name = chunk_deletes.front().list_name;
1058  const safe_browsing_util::ListType list_id =
1059      safe_browsing_util::GetListId(list_name);
1060
1061  SafeBrowsingStore* store = GetStore(list_id);
1062  if (!store) return;
1063
1064  change_detected_ = true;
1065
1066  for (size_t i = 0; i < chunk_deletes.size(); ++i) {
1067    std::vector<int> chunk_numbers;
1068    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
1069    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
1070      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
1071      if (chunk_deletes[i].is_sub_del)
1072        store->DeleteSubChunk(encoded_chunk_id);
1073      else
1074        store->DeleteAddChunk(encoded_chunk_id);
1075    }
1076  }
1077}
1078
1079void SafeBrowsingDatabaseNew::CacheHashResults(
1080    const std::vector<SBPrefix>& prefixes,
1081    const std::vector<SBFullHashResult>& full_hits) {
1082  // This is called on the I/O thread, lock against updates.
1083  base::AutoLock locked(lookup_lock_);
1084
1085  if (full_hits.empty()) {
1086    prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
1087    return;
1088  }
1089
1090  // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
1091  // Refactor to make them identical.
1092  const base::Time now = base::Time::Now();
1093  const size_t orig_size = pending_browse_hashes_.size();
1094  for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
1095       iter != full_hits.end(); ++iter) {
1096    const int list_id = safe_browsing_util::GetListId(iter->list_name);
1097    if (list_id == safe_browsing_util::MALWARE ||
1098        list_id == safe_browsing_util::PHISH) {
1099      int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
1100      SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
1101      pending_browse_hashes_.push_back(add_full_hash);
1102    }
1103  }
1104
1105  // Sort new entries then merge with the previously-sorted entries.
1106  std::vector<SBAddFullHash>::iterator
1107      orig_end = pending_browse_hashes_.begin() + orig_size;
1108  std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
1109  std::inplace_merge(pending_browse_hashes_.begin(),
1110                     orig_end, pending_browse_hashes_.end(),
1111                     SBAddFullHashPrefixLess);
1112}
1113
1114bool SafeBrowsingDatabaseNew::UpdateStarted(
1115    std::vector<SBListChunkRanges>* lists) {
1116  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1117  DCHECK(lists);
1118
1119  // If |BeginUpdate()| fails, reset the database.
1120  if (!browse_store_->BeginUpdate()) {
1121    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
1122    HandleCorruptDatabase();
1123    return false;
1124  }
1125
1126  if (download_store_.get() && !download_store_->BeginUpdate()) {
1127    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
1128    HandleCorruptDatabase();
1129    return false;
1130  }
1131
1132  if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
1133    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1134    HandleCorruptDatabase();
1135    return false;
1136  }
1137
1138  if (download_whitelist_store_.get() &&
1139      !download_whitelist_store_->BeginUpdate()) {
1140    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1141    HandleCorruptDatabase();
1142    return false;
1143  }
1144
1145  if (extension_blacklist_store_ &&
1146      !extension_blacklist_store_->BeginUpdate()) {
1147    RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1148    HandleCorruptDatabase();
1149    return false;
1150  }
1151
1152  if (side_effect_free_whitelist_store_ &&
1153      !side_effect_free_whitelist_store_->BeginUpdate()) {
1154    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1155    HandleCorruptDatabase();
1156    return false;
1157  }
1158
1159  if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1160    RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1161    HandleCorruptDatabase();
1162    return false;
1163  }
1164
1165  std::vector<std::string> browse_listnames;
1166  browse_listnames.push_back(safe_browsing_util::kMalwareList);
1167  browse_listnames.push_back(safe_browsing_util::kPhishingList);
1168  UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);
1169
1170  if (download_store_.get()) {
1171    // This store used to contain kBinHashList in addition to
1172    // kBinUrlList.  Strip the stale data before generating the chunk
1173    // ranges to request.  UpdateChunkRanges() will traverse the chunk
1174    // list, so this is very cheap if there are no kBinHashList chunks.
1175    const int listid =
1176        safe_browsing_util::GetListId(safe_browsing_util::kBinHashList);
1177    DeleteChunksFromStore(download_store_.get(), listid);
1178
1179    // The above marks the chunks for deletion, but they are not
1180    // actually deleted until the database is rewritten.  The
1181    // following code removes the kBinHashList part of the request
1182    // before continuing so that UpdateChunkRanges() doesn't break.
1183    std::vector<std::string> download_listnames;
1184    download_listnames.push_back(safe_browsing_util::kBinUrlList);
1185    download_listnames.push_back(safe_browsing_util::kBinHashList);
1186    UpdateChunkRanges(download_store_.get(), download_listnames, lists);
1187    DCHECK_EQ(lists->back().name,
1188              std::string(safe_browsing_util::kBinHashList));
1189    lists->pop_back();
1190
1191    // TODO(shess): This problem could also be handled in
1192    // BeginUpdate() by detecting the chunks to delete and rewriting
1193    // the database before it's used.  When I implemented that, it
1194    // felt brittle, it might be easier to just wait for some future
1195    // format change.
1196  }
1197
1198  if (csd_whitelist_store_.get()) {
1199    std::vector<std::string> csd_whitelist_listnames;
1200    csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
1201    UpdateChunkRanges(csd_whitelist_store_.get(),
1202                      csd_whitelist_listnames, lists);
1203  }
1204
1205  if (download_whitelist_store_.get()) {
1206    std::vector<std::string> download_whitelist_listnames;
1207    download_whitelist_listnames.push_back(
1208        safe_browsing_util::kDownloadWhiteList);
1209    UpdateChunkRanges(download_whitelist_store_.get(),
1210                      download_whitelist_listnames, lists);
1211  }
1212
1213  if (extension_blacklist_store_) {
1214    UpdateChunkRanges(
1215        extension_blacklist_store_.get(),
1216        std::vector<std::string>(1, safe_browsing_util::kExtensionBlacklist),
1217        lists);
1218  }
1219
1220  if (side_effect_free_whitelist_store_) {
1221    UpdateChunkRanges(
1222        side_effect_free_whitelist_store_.get(),
1223        std::vector<std::string>(
1224            1, safe_browsing_util::kSideEffectFreeWhitelist),
1225        lists);
1226  }
1227
1228  if (ip_blacklist_store_) {
1229    UpdateChunkRanges(
1230        ip_blacklist_store_.get(),
1231        std::vector<std::string>(1, safe_browsing_util::kIPBlacklist),
1232        lists);
1233  }
1234
1235  corruption_detected_ = false;
1236  change_detected_ = false;
1237  return true;
1238}
1239
1240void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1241  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1242
1243  // The update may have failed due to corrupt storage (for instance,
1244  // an excessive number of invalid add_chunks and sub_chunks).
1245  // Double-check that the databases are valid.
1246  // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1247  // sections would allow throwing a corruption error in
1248  // UpdateStarted().
1249  if (!update_succeeded) {
1250    if (!browse_store_->CheckValidity())
1251      DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1252
1253    if (download_store_.get() && !download_store_->CheckValidity())
1254      DLOG(ERROR) << "Safe-browsing download database corrupt.";
1255
1256    if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1257      DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1258
1259    if (download_whitelist_store_.get() &&
1260        !download_whitelist_store_->CheckValidity()) {
1261      DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1262    }
1263
1264    if (extension_blacklist_store_ &&
1265        !extension_blacklist_store_->CheckValidity()) {
1266      DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1267    }
1268
1269    if (side_effect_free_whitelist_store_ &&
1270        !side_effect_free_whitelist_store_->CheckValidity()) {
1271      DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1272                  << "corrupt.";
1273    }
1274
1275    if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1276      DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1277    }
1278  }
1279
1280  if (corruption_detected_)
1281    return;
1282
1283  // Unroll the transaction if there was a protocol error or if the
1284  // transaction was empty.  This will leave the prefix set, the
1285  // pending hashes, and the prefix miss cache in place.
1286  if (!update_succeeded || !change_detected_) {
1287    // Track empty updates to answer questions at http://crbug.com/72216 .
1288    if (update_succeeded && !change_detected_)
1289      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1290    browse_store_->CancelUpdate();
1291    if (download_store_.get())
1292      download_store_->CancelUpdate();
1293    if (csd_whitelist_store_.get())
1294      csd_whitelist_store_->CancelUpdate();
1295    if (download_whitelist_store_.get())
1296      download_whitelist_store_->CancelUpdate();
1297    if (extension_blacklist_store_)
1298      extension_blacklist_store_->CancelUpdate();
1299    if (side_effect_free_whitelist_store_)
1300      side_effect_free_whitelist_store_->CancelUpdate();
1301    if (ip_blacklist_store_)
1302      ip_blacklist_store_->CancelUpdate();
1303    return;
1304  }
1305
1306  if (download_store_) {
1307    int64 size_bytes = UpdateHashPrefixStore(
1308        download_filename_,
1309        download_store_.get(),
1310        FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1311    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1312                         static_cast<int>(size_bytes / 1024));
1313  }
1314
1315  UpdateBrowseStore();
1316  UpdateWhitelistStore(csd_whitelist_filename_,
1317                       csd_whitelist_store_.get(),
1318                       &csd_whitelist_);
1319  UpdateWhitelistStore(download_whitelist_filename_,
1320                       download_whitelist_store_.get(),
1321                       &download_whitelist_);
1322
1323  if (extension_blacklist_store_) {
1324    int64 size_bytes = UpdateHashPrefixStore(
1325        extension_blacklist_filename_,
1326        extension_blacklist_store_.get(),
1327        FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1328    UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1329                         static_cast<int>(size_bytes / 1024));
1330  }
1331
1332  if (side_effect_free_whitelist_store_)
1333    UpdateSideEffectFreeWhitelistStore();
1334
1335  if (ip_blacklist_store_)
1336    UpdateIpBlacklistStore();
1337}
1338
1339void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1340    const base::FilePath& store_filename,
1341    SafeBrowsingStore* store,
1342    SBWhitelist* whitelist) {
1343  if (!store)
1344    return;
1345
1346  // For the whitelists, we don't cache and save full hashes since all
1347  // hashes are already full.
1348  std::vector<SBAddFullHash> empty_add_hashes;
1349
1350  // Not needed for the whitelists.
1351  std::set<SBPrefix> empty_miss_cache;
1352
1353  // Note: prefixes will not be empty.  The current data store implementation
1354  // stores all full-length hashes as both full and prefix hashes.
1355  SBAddPrefixes prefixes;
1356  std::vector<SBAddFullHash> full_hashes;
1357  if (!store->FinishUpdate(empty_add_hashes, empty_miss_cache, &prefixes,
1358                           &full_hashes)) {
1359    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1360    WhitelistEverything(whitelist);
1361    return;
1362  }
1363
1364#if defined(OS_MACOSX)
1365  base::mac::SetFileBackupExclusion(store_filename);
1366#endif
1367
1368  LoadWhitelist(full_hashes, whitelist);
1369}
1370
1371int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1372    const base::FilePath& store_filename,
1373    SafeBrowsingStore* store,
1374    FailureType failure_type) {
1375  // We don't cache and save full hashes.
1376  std::vector<SBAddFullHash> empty_add_hashes;
1377
1378  // Backend lookup happens only if a prefix is in add list.
1379  std::set<SBPrefix> empty_miss_cache;
1380
1381  // These results are not used after this call. Simply ignore the
1382  // returned value after FinishUpdate(...).
1383  SBAddPrefixes add_prefixes_result;
1384  std::vector<SBAddFullHash> add_full_hashes_result;
1385
1386  if (!store->FinishUpdate(empty_add_hashes,
1387                           empty_miss_cache,
1388                           &add_prefixes_result,
1389                           &add_full_hashes_result)) {
1390    RecordFailure(failure_type);
1391  }
1392
1393#if defined(OS_MACOSX)
1394  base::mac::SetFileBackupExclusion(store_filename);
1395#endif
1396
1397  return GetFileSizeOrZero(store_filename);
1398}
1399
1400void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1401  // Copy out the pending add hashes.  Copy rather than swapping in
1402  // case |ContainsBrowseURL()| is called before the new filter is complete.
1403  std::vector<SBAddFullHash> pending_add_hashes;
1404  {
1405    base::AutoLock locked(lookup_lock_);
1406    pending_add_hashes.insert(pending_add_hashes.end(),
1407                              pending_browse_hashes_.begin(),
1408                              pending_browse_hashes_.end());
1409  }
1410
1411  // Measure the amount of IO during the filter build.
1412  base::IoCounters io_before, io_after;
1413  base::ProcessHandle handle = base::Process::Current().handle();
1414  scoped_ptr<base::ProcessMetrics> metric(
1415#if !defined(OS_MACOSX)
1416      base::ProcessMetrics::CreateProcessMetrics(handle)
1417#else
1418      // Getting stats only for the current process is enough, so NULL is fine.
1419      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1420#endif
1421  );
1422
1423  // IoCounters are currently not supported on Mac, and may not be
1424  // available for Linux, so we check the result and only show IO
1425  // stats if they are available.
1426  const bool got_counters = metric->GetIOCounters(&io_before);
1427
1428  const base::TimeTicks before = base::TimeTicks::Now();
1429
1430  SBAddPrefixes add_prefixes;
1431  std::vector<SBAddFullHash> add_full_hashes;
1432  if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
1433                                   &add_prefixes, &add_full_hashes)) {
1434    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1435    return;
1436  }
1437
1438  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1439  // could be passed directly to |PrefixSet()|, removing the need for
1440  // |prefixes|.  For now, |prefixes| is useful while debugging
1441  // things.
1442  std::vector<SBPrefix> prefixes;
1443  prefixes.reserve(add_prefixes.size());
1444  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1445       iter != add_prefixes.end(); ++iter) {
1446    prefixes.push_back(iter->prefix);
1447  }
1448
1449  std::sort(prefixes.begin(), prefixes.end());
1450  scoped_ptr<safe_browsing::PrefixSet>
1451      prefix_set(new safe_browsing::PrefixSet(prefixes));
1452
1453  // This needs to be in sorted order by prefix for efficient access.
1454  std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1455            SBAddFullHashPrefixLess);
1456
1457  // Swap in the newly built filter and cache.
1458  {
1459    base::AutoLock locked(lookup_lock_);
1460    full_browse_hashes_.swap(add_full_hashes);
1461
1462    // TODO(shess): If |CacheHashResults()| is posted between the
1463    // earlier lock and this clear, those pending hashes will be lost.
1464    // It could be fixed by only removing hashes which were collected
1465    // at the earlier point.  I believe that is fail-safe as-is (the
1466    // hash will be fetched again).
1467    pending_browse_hashes_.clear();
1468    prefix_miss_cache_.clear();
1469    browse_prefix_set_.swap(prefix_set);
1470  }
1471
1472  DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1473           << (base::TimeTicks::Now() - before).InMilliseconds()
1474           << " ms total.  prefix count: " << add_prefixes.size();
1475  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1476
1477  // Persist the prefix set to disk.  Since only this thread changes
1478  // |browse_prefix_set_|, there is no need to lock.
1479  WritePrefixSet();
1480
1481  // Gather statistics.
1482  if (got_counters && metric->GetIOCounters(&io_after)) {
1483    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1484                         static_cast<int>(io_after.ReadTransferCount -
1485                                          io_before.ReadTransferCount) / 1024);
1486    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1487                         static_cast<int>(io_after.WriteTransferCount -
1488                                          io_before.WriteTransferCount) / 1024);
1489    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1490                         static_cast<int>(io_after.ReadOperationCount -
1491                                          io_before.ReadOperationCount));
1492    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1493                         static_cast<int>(io_after.WriteOperationCount -
1494                                          io_before.WriteOperationCount));
1495  }
1496
1497  int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
1498  UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1499                       static_cast<int>(file_size / 1024));
1500  file_size = GetFileSizeOrZero(browse_filename_);
1501  UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1502                       static_cast<int>(file_size / 1024));
1503
1504#if defined(OS_MACOSX)
1505  base::mac::SetFileBackupExclusion(browse_filename_);
1506#endif
1507}
1508
1509void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1510  std::vector<SBAddFullHash> empty_add_hashes;
1511  std::set<SBPrefix> empty_miss_cache;
1512  SBAddPrefixes add_prefixes;
1513  std::vector<SBAddFullHash> add_full_hashes_result;
1514
1515  if (!side_effect_free_whitelist_store_->FinishUpdate(
1516          empty_add_hashes,
1517          empty_miss_cache,
1518          &add_prefixes,
1519          &add_full_hashes_result)) {
1520    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1521    return;
1522  }
1523
1524  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1525  // could be passed directly to |PrefixSet()|, removing the need for
1526  // |prefixes|.  For now, |prefixes| is useful while debugging
1527  // things.
1528  std::vector<SBPrefix> prefixes;
1529  prefixes.reserve(add_prefixes.size());
1530  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1531       iter != add_prefixes.end(); ++iter) {
1532    prefixes.push_back(iter->prefix);
1533  }
1534
1535  std::sort(prefixes.begin(), prefixes.end());
1536  scoped_ptr<safe_browsing::PrefixSet>
1537      prefix_set(new safe_browsing::PrefixSet(prefixes));
1538
1539  // Swap in the newly built prefix set.
1540  {
1541    base::AutoLock locked(lookup_lock_);
1542    side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1543  }
1544
1545  const base::TimeTicks before = base::TimeTicks::Now();
1546  const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1547      side_effect_free_whitelist_prefix_set_filename_);
1548  DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1549           << "set in " << (base::TimeTicks::Now() - before).InMilliseconds()
1550           << " ms";
1551  UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1552                      base::TimeTicks::Now() - before);
1553
1554  if (!write_ok)
1555    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1556
1557  // Gather statistics.
1558  int64 file_size = GetFileSizeOrZero(
1559      side_effect_free_whitelist_prefix_set_filename_);
1560  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1561                       static_cast<int>(file_size / 1024));
1562  file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
1563  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1564                       static_cast<int>(file_size / 1024));
1565
1566#if defined(OS_MACOSX)
1567  base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1568  base::mac::SetFileBackupExclusion(
1569      side_effect_free_whitelist_prefix_set_filename_);
1570#endif
1571}
1572
1573void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1574  // For the IP blacklist, we don't cache and save full hashes since all
1575  // hashes are already full.
1576  std::vector<SBAddFullHash> empty_add_hashes;
1577
1578  // Not needed for the IP blacklist.
1579  std::set<SBPrefix> empty_miss_cache;
1580
1581  // Note: prefixes will not be empty.  The current data store implementation
1582  // stores all full-length hashes as both full and prefix hashes.
1583  SBAddPrefixes prefixes;
1584  std::vector<SBAddFullHash> full_hashes;
1585  if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes, empty_miss_cache,
1586                                         &prefixes, &full_hashes)) {
1587    RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1588    LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
1589    return;
1590  }
1591
1592#if defined(OS_MACOSX)
1593  base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1594#endif
1595
1596  LoadIpBlacklist(full_hashes);
1597}
1598
1599void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1600  // Reset the database after the current task has unwound (but only
1601  // reset once within the scope of a given task).
1602  if (!reset_factory_.HasWeakPtrs()) {
1603    RecordFailure(FAILURE_DATABASE_CORRUPT);
1604    base::MessageLoop::current()->PostTask(FROM_HERE,
1605        base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1606                   reset_factory_.GetWeakPtr()));
1607  }
1608}
1609
1610void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1611  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1612  corruption_detected_ = true;  // Stop updating the database.
1613  ResetDatabase();
1614  DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1615}
1616
1617// TODO(shess): I'm not clear why this code doesn't have any
1618// real error-handling.
1619void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1620  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1621  DCHECK(!browse_prefix_set_filename_.empty());
1622
1623  // If there is no database, the filter cannot be used.
1624  base::PlatformFileInfo db_info;
1625  if (!file_util::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1626    return;
1627
1628  // Cleanup any stale bloom filter (no longer used).
1629  // TODO(shess): Track failure to delete?
1630  base::FilePath bloom_filter_filename =
1631      BloomFilterForFilename(browse_filename_);
1632  base::DeleteFile(bloom_filter_filename, false);
1633
1634  const base::TimeTicks before = base::TimeTicks::Now();
1635  browse_prefix_set_.reset(safe_browsing::PrefixSet::LoadFile(
1636      browse_prefix_set_filename_));
1637  DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1638           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1639  UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1640
1641  if (!browse_prefix_set_.get())
1642    RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1643}
1644
1645bool SafeBrowsingDatabaseNew::Delete() {
1646  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1647
1648  const bool r1 = browse_store_->Delete();
1649  if (!r1)
1650    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1651
1652  const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1653  if (!r2)
1654    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1655
1656  const bool r3 = csd_whitelist_store_.get() ?
1657      csd_whitelist_store_->Delete() : true;
1658  if (!r3)
1659    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1660
1661  const bool r4 = download_whitelist_store_.get() ?
1662      download_whitelist_store_->Delete() : true;
1663  if (!r4)
1664    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1665
1666  base::FilePath bloom_filter_filename =
1667      BloomFilterForFilename(browse_filename_);
1668  const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1669  if (!r5)
1670    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1671
1672  const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
1673  if (!r6)
1674    RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1675
1676  const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
1677  if (!r7)
1678    RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1679
1680  const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
1681                                    false);
1682  if (!r8)
1683    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1684
1685  const bool r9 = base::DeleteFile(
1686      side_effect_free_whitelist_prefix_set_filename_,
1687      false);
1688  if (!r9)
1689    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1690
1691  const bool r10 = base::DeleteFile(ip_blacklist_filename_, false);
1692  if (!r10)
1693    RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1694
1695  return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
1696}
1697
1698void SafeBrowsingDatabaseNew::WritePrefixSet() {
1699  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1700
1701  if (!browse_prefix_set_.get())
1702    return;
1703
1704  const base::TimeTicks before = base::TimeTicks::Now();
1705  const bool write_ok = browse_prefix_set_->WriteFile(
1706      browse_prefix_set_filename_);
1707  DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1708           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1709  UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1710
1711  if (!write_ok)
1712    RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1713
1714#if defined(OS_MACOSX)
1715  base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
1716#endif
1717}
1718
1719void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1720  base::AutoLock locked(lookup_lock_);
1721  whitelist->second = true;
1722  whitelist->first.clear();
1723}
1724
1725void SafeBrowsingDatabaseNew::LoadWhitelist(
1726    const std::vector<SBAddFullHash>& full_hashes,
1727    SBWhitelist* whitelist) {
1728  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1729  if (full_hashes.size() > kMaxWhitelistSize) {
1730    WhitelistEverything(whitelist);
1731    return;
1732  }
1733
1734  std::vector<SBFullHash> new_whitelist;
1735  new_whitelist.reserve(full_hashes.size());
1736  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1737       it != full_hashes.end(); ++it) {
1738    new_whitelist.push_back(it->full_hash);
1739  }
1740  std::sort(new_whitelist.begin(), new_whitelist.end());
1741
1742  SBFullHash kill_switch;
1743  crypto::SHA256HashString(kWhitelistKillSwitchUrl, &kill_switch,
1744                           sizeof(kill_switch));
1745  if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1746                         kill_switch)) {
1747    // The kill switch is whitelisted hence we whitelist all URLs.
1748    WhitelistEverything(whitelist);
1749  } else {
1750    base::AutoLock locked(lookup_lock_);
1751    whitelist->second = false;
1752    whitelist->first.swap(new_whitelist);
1753  }
1754}
1755
1756void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1757    const std::vector<SBAddFullHash>& full_hashes) {
1758  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1759  IPBlacklist new_blacklist;
1760  DVLOG(2) << "Writing IP blacklist of size: " << full_hashes.size();
1761  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1762       it != full_hashes.end();
1763       ++it) {
1764    const char* full_hash = it->full_hash.full_hash;
1765    DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1766    // The format of the IP blacklist is:
1767    // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1768    std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1769    size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1770    if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1771      DVLOG(2) << "Invalid IP prefix size in IP blacklist: " << prefix_size;
1772      RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1773      new_blacklist.clear();  // Load empty blacklist.
1774      break;
1775    }
1776
1777    // We precompute the mask for the given subnet size to speed up lookups.
1778    // Basically we need to create a 16B long string which has the highest
1779    // |size| bits sets to one.
1780    std::string mask(net::kIPv6AddressSize, '\0');
1781    mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1782    if ((prefix_size % 8) != 0) {
1783      mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1784    }
1785    DVLOG(2) << "Inserting malicious IP: "
1786             << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1787             << " mask:" << base::HexEncode(mask.data(), mask.size())
1788             << " prefix_size:" << prefix_size
1789             << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1790                                                 hashed_ip_prefix.size());
1791    new_blacklist[mask].insert(hashed_ip_prefix);
1792  }
1793
1794  base::AutoLock locked(lookup_lock_);
1795  ip_blacklist_.swap(new_blacklist);
1796}
1797
1798bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1799  SBFullHash malware_kill_switch;
1800  crypto::SHA256HashString(kMalwareIPKillSwitchUrl, &malware_kill_switch,
1801                           sizeof(malware_kill_switch));
1802  std::vector<SBFullHash> full_hashes;
1803  full_hashes.push_back(malware_kill_switch);
1804  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1805}
1806