1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7#include <algorithm>
8#include <iterator>
9
10#include "base/bind.h"
11#include "base/files/file_util.h"
12#include "base/message_loop/message_loop.h"
13#include "base/metrics/histogram.h"
14#include "base/metrics/stats_counters.h"
15#include "base/process/process.h"
16#include "base/process/process_metrics.h"
17#include "base/sha1.h"
18#include "base/strings/string_number_conversions.h"
19#include "base/strings/stringprintf.h"
20#include "base/time/time.h"
21#include "chrome/browser/safe_browsing/prefix_set.h"
22#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23#include "content/public/browser/browser_thread.h"
24#include "crypto/sha2.h"
25#include "net/base/net_util.h"
26#include "url/gurl.h"
27
28#if defined(OS_MACOSX)
29#include "base/mac/mac_util.h"
30#endif
31
32using content::BrowserThread;
33
34namespace {
35
36// Filename suffix for the bloom filter.
37const base::FilePath::CharType kBloomFilterFile[] =
38    FILE_PATH_LITERAL(" Filter 2");
39// Filename suffix for the prefix set.
40const base::FilePath::CharType kPrefixSetFile[] =
41    FILE_PATH_LITERAL(" Prefix Set");
42// Filename suffix for download store.
43const base::FilePath::CharType kDownloadDBFile[] =
44    FILE_PATH_LITERAL(" Download");
45// Filename suffix for client-side phishing detection whitelist store.
46const base::FilePath::CharType kCsdWhitelistDBFile[] =
47    FILE_PATH_LITERAL(" Csd Whitelist");
48// Filename suffix for the download whitelist store.
49const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50    FILE_PATH_LITERAL(" Download Whitelist");
51// Filename suffix for the extension blacklist store.
52const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53    FILE_PATH_LITERAL(" Extension Blacklist");
54// Filename suffix for the side-effect free whitelist store.
55const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56    FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57// Filename suffix for the csd malware IP blacklist store.
58const base::FilePath::CharType kIPBlacklistDBFile[] =
59    FILE_PATH_LITERAL(" IP Blacklist");
60
61// Filename suffix for browse store.
62// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63// Unfortunately, to change the name implies lots of transition code
64// for little benefit.  If/when file formats change (say to put all
65// the data in one file), that would be a convenient point to rectify
66// this.
67// TODO(shess): This shouldn't be OS-driven <http://crbug.com/394379>
68#if defined(OS_ANDROID)
69// NOTE(shess): This difference is also reflected in the list name in
70// safe_browsing_util.cc.
71// TODO(shess): Spin up an alternate list id which can be persisted in the
72// store.  Then if a mistake is made, it won't cause confusion between
73// incompatible lists.
74const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Mobile");
75#else
76const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
77#endif
78
79// Maximum number of entries we allow in any of the whitelists.
80// If a whitelist on disk contains more entries then all lookups to
81// the whitelist will be considered a match.
82const size_t kMaxWhitelistSize = 5000;
83
84// If the hash of this exact expression is on a whitelist then all
85// lookups to this whitelist will be considered a match.
86const char kWhitelistKillSwitchUrl[] =
87    "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
88
89// If the hash of this exact expression is on a whitelist then the
90// malware IP blacklisting feature will be disabled in csd.
91// Don't change this!
92const char kMalwareIPKillSwitchUrl[] =
93    "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
94
95const size_t kMaxIpPrefixSize = 128;
96const size_t kMinIpPrefixSize = 1;
97
98// To save space, the incoming |chunk_id| and |list_id| are combined
99// into an |encoded_chunk_id| for storage by shifting the |list_id|
100// into the low-order bits.  These functions decode that information.
101// TODO(lzheng): It was reasonable when database is saved in sqlite, but
102// there should be better ways to save chunk_id and list_id after we use
103// SafeBrowsingStoreFile.
104int GetListIdBit(const int encoded_chunk_id) {
105  return encoded_chunk_id & 1;
106}
107int DecodeChunkId(int encoded_chunk_id) {
108  return encoded_chunk_id >> 1;
109}
110int EncodeChunkId(const int chunk, const int list_id) {
111  DCHECK_NE(list_id, safe_browsing_util::INVALID);
112  return chunk << 1 | list_id % 2;
113}
114
115// Generate the set of full hashes to check for |url|.  If
116// |include_whitelist_hashes| is true we will generate additional path-prefixes
117// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
118// whitelist it should also match /foo/bar which is not the case for all the
119// other lists.  We'll also always add a pattern for the empty path.
120// TODO(shess): This function is almost the same as
121// |CompareFullHashes()| in safe_browsing_util.cc, except that code
122// does an early exit on match.  Since match should be the infrequent
123// case (phishing or malware found), consider combining this function
124// with that one.
125void BrowseFullHashesToCheck(const GURL& url,
126                             bool include_whitelist_hashes,
127                             std::vector<SBFullHash>* full_hashes) {
128  std::vector<std::string> hosts;
129  if (url.HostIsIPAddress()) {
130    hosts.push_back(url.host());
131  } else {
132    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
133  }
134
135  std::vector<std::string> paths;
136  safe_browsing_util::GeneratePathsToCheck(url, &paths);
137
138  for (size_t i = 0; i < hosts.size(); ++i) {
139    for (size_t j = 0; j < paths.size(); ++j) {
140      const std::string& path = paths[j];
141      full_hashes->push_back(SBFullHashForString(hosts[i] + path));
142
143      // We may have /foo as path-prefix in the whitelist which should
144      // also match with /foo/bar and /foo?bar.  Hence, for every path
145      // that ends in '/' we also add the path without the slash.
146      if (include_whitelist_hashes &&
147          path.size() > 1 &&
148          path[path.size() - 1] == '/') {
149        full_hashes->push_back(
150            SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1)));
151      }
152    }
153  }
154}
155
156// Get the prefixes matching the download |urls|.
157void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
158                            std::vector<SBPrefix>* prefixes) {
159  std::vector<SBFullHash> full_hashes;
160  for (size_t i = 0; i < urls.size(); ++i)
161    BrowseFullHashesToCheck(urls[i], false, &full_hashes);
162
163  for (size_t i = 0; i < full_hashes.size(); ++i)
164    prefixes->push_back(full_hashes[i].prefix);
165}
166
167// Helper function to compare addprefixes in |store| with |prefixes|.
168// The |list_bit| indicates which list (url or hash) to compare.
169//
170// Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
171// the actual matching prefixes.
172bool MatchAddPrefixes(SafeBrowsingStore* store,
173                      int list_bit,
174                      const std::vector<SBPrefix>& prefixes,
175                      std::vector<SBPrefix>* prefix_hits) {
176  prefix_hits->clear();
177  bool found_match = false;
178
179  SBAddPrefixes add_prefixes;
180  store->GetAddPrefixes(&add_prefixes);
181  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
182       iter != add_prefixes.end(); ++iter) {
183    for (size_t j = 0; j < prefixes.size(); ++j) {
184      const SBPrefix& prefix = prefixes[j];
185      if (prefix == iter->prefix &&
186          GetListIdBit(iter->chunk_id) == list_bit) {
187        prefix_hits->push_back(prefix);
188        found_match = true;
189      }
190    }
191  }
192  return found_match;
193}
194
195// This function generates a chunk range string for |chunks|. It
196// outputs one chunk range string per list and writes it to the
197// |list_ranges| vector.  We expect |list_ranges| to already be of the
198// right size.  E.g., if |chunks| contains chunks with two different
199// list ids then |list_ranges| must contain two elements.
200void GetChunkRanges(const std::vector<int>& chunks,
201                    std::vector<std::string>* list_ranges) {
202  // Since there are 2 possible list ids, there must be exactly two
203  // list ranges.  Even if the chunk data should only contain one
204  // line, this code has to somehow handle corruption.
205  DCHECK_EQ(2U, list_ranges->size());
206
207  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
208  for (std::vector<int>::const_iterator iter = chunks.begin();
209       iter != chunks.end(); ++iter) {
210    int mod_list_id = GetListIdBit(*iter);
211    DCHECK_GE(mod_list_id, 0);
212    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
213    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
214  }
215  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
216    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
217  }
218}
219
220// Helper function to create chunk range lists for Browse related
221// lists.
222void UpdateChunkRanges(SafeBrowsingStore* store,
223                       const std::vector<std::string>& listnames,
224                       std::vector<SBListChunkRanges>* lists) {
225  if (!store)
226    return;
227
228  DCHECK_GT(listnames.size(), 0U);
229  DCHECK_LE(listnames.size(), 2U);
230  std::vector<int> add_chunks;
231  std::vector<int> sub_chunks;
232  store->GetAddChunks(&add_chunks);
233  store->GetSubChunks(&sub_chunks);
234
235  // Always decode 2 ranges, even if only the first one is expected.
236  // The loop below will only load as many into |lists| as |listnames|
237  // indicates.
238  std::vector<std::string> adds(2);
239  std::vector<std::string> subs(2);
240  GetChunkRanges(add_chunks, &adds);
241  GetChunkRanges(sub_chunks, &subs);
242
243  for (size_t i = 0; i < listnames.size(); ++i) {
244    const std::string& listname = listnames[i];
245    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
246              static_cast<int>(i % 2));
247    DCHECK_NE(safe_browsing_util::GetListId(listname),
248              safe_browsing_util::INVALID);
249    lists->push_back(SBListChunkRanges(listname));
250    lists->back().adds.swap(adds[i]);
251    lists->back().subs.swap(subs[i]);
252  }
253}
254
255void UpdateChunkRangesForLists(SafeBrowsingStore* store,
256                               const std::string& listname0,
257                               const std::string& listname1,
258                               std::vector<SBListChunkRanges>* lists) {
259  std::vector<std::string> listnames;
260  listnames.push_back(listname0);
261  listnames.push_back(listname1);
262  UpdateChunkRanges(store, listnames, lists);
263}
264
265void UpdateChunkRangesForList(SafeBrowsingStore* store,
266                              const std::string& listname,
267                              std::vector<SBListChunkRanges>* lists) {
268  UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
269}
270
271// This code always checks for non-zero file size.  This helper makes
272// that less verbose.
273int64 GetFileSizeOrZero(const base::FilePath& file_path) {
274  int64 size_64;
275  if (!base::GetFileSize(file_path, &size_64))
276    return 0;
277  return size_64;
278}
279
280// Helper for ContainsBrowseUrlHashes().  Returns true if an un-expired match
281// for |full_hash| is found in |cache|, with any matches appended to |results|
282// (true can be returned with zero matches).  |expire_base| is used to check the
283// cache lifetime of matches, expired matches will be discarded from |cache|.
284bool GetCachedFullHash(std::map<SBPrefix, SBCachedFullHashResult>* cache,
285                       const SBFullHash& full_hash,
286                       const base::Time& expire_base,
287                       std::vector<SBFullHashResult>* results) {
288  // First check if there is a valid cached result for this prefix.
289  std::map<SBPrefix, SBCachedFullHashResult>::iterator
290      citer = cache->find(full_hash.prefix);
291  if (citer == cache->end())
292    return false;
293
294  // Remove expired entries.
295  SBCachedFullHashResult& cached_result = citer->second;
296  if (cached_result.expire_after <= expire_base) {
297    cache->erase(citer);
298    return false;
299  }
300
301  // Find full-hash matches.
302  std::vector<SBFullHashResult>& cached_hashes = cached_result.full_hashes;
303  for (size_t i = 0; i < cached_hashes.size(); ++i) {
304    if (SBFullHashEqual(full_hash, cached_hashes[i].hash))
305      results->push_back(cached_hashes[i]);
306  }
307
308  return true;
309}
310
311}  // namespace
312
313// The default SafeBrowsingDatabaseFactory.
314class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
315 public:
316  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
317      bool enable_download_protection,
318      bool enable_client_side_whitelist,
319      bool enable_download_whitelist,
320      bool enable_extension_blacklist,
321      bool enable_side_effect_free_whitelist,
322      bool enable_ip_blacklist) OVERRIDE {
323    return new SafeBrowsingDatabaseNew(
324        new SafeBrowsingStoreFile,
325        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
326        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
327        enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
328        enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
329        enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
330        enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
331  }
332
333  SafeBrowsingDatabaseFactoryImpl() { }
334
335 private:
336  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
337};
338
339// static
340SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
341
342// Factory method, non-thread safe. Caller has to make sure this s called
343// on SafeBrowsing Thread.
344// TODO(shess): There's no need for a factory any longer.  Convert
345// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
346// callers just construct things directly.
347SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
348    bool enable_download_protection,
349    bool enable_client_side_whitelist,
350    bool enable_download_whitelist,
351    bool enable_extension_blacklist,
352    bool enable_side_effect_free_whitelist,
353    bool enable_ip_blacklist) {
354  if (!factory_)
355    factory_ = new SafeBrowsingDatabaseFactoryImpl();
356  return factory_->CreateSafeBrowsingDatabase(
357      enable_download_protection,
358      enable_client_side_whitelist,
359      enable_download_whitelist,
360      enable_extension_blacklist,
361      enable_side_effect_free_whitelist,
362      enable_ip_blacklist);
363}
364
365SafeBrowsingDatabase::~SafeBrowsingDatabase() {
366}
367
368// static
369base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
370    const base::FilePath& db_base_filename) {
371  return base::FilePath(db_base_filename.value() + kBrowseDBFile);
372}
373
374// static
375base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
376    const base::FilePath& db_base_filename) {
377  return base::FilePath(db_base_filename.value() + kDownloadDBFile);
378}
379
380// static
381base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
382    const base::FilePath& db_filename) {
383  return base::FilePath(db_filename.value() + kBloomFilterFile);
384}
385
386// static
387base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
388    const base::FilePath& db_filename) {
389  return base::FilePath(db_filename.value() + kPrefixSetFile);
390}
391
392// static
393base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
394    const base::FilePath& db_filename) {
395  return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
396}
397
398// static
399base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
400    const base::FilePath& db_filename) {
401  return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
402}
403
404// static
405base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
406    const base::FilePath& db_filename) {
407  return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
408}
409
410// static
411base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
412    const base::FilePath& db_filename) {
413  return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
414}
415
416// static
417base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
418    const base::FilePath& db_filename) {
419  return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
420}
421
422SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
423  if (list_id == safe_browsing_util::PHISH ||
424      list_id == safe_browsing_util::MALWARE) {
425    return browse_store_.get();
426  } else if (list_id == safe_browsing_util::BINURL) {
427    return download_store_.get();
428  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
429    return csd_whitelist_store_.get();
430  } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
431    return download_whitelist_store_.get();
432  } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
433    return extension_blacklist_store_.get();
434  } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
435    return side_effect_free_whitelist_store_.get();
436  } else if (list_id == safe_browsing_util::IPBLACKLIST) {
437    return ip_blacklist_store_.get();
438  }
439  return NULL;
440}
441
442// static
443void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
444  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
445                            FAILURE_DATABASE_MAX);
446}
447
448SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
449    : creation_loop_(base::MessageLoop::current()),
450      browse_store_(new SafeBrowsingStoreFile),
451      corruption_detected_(false),
452      change_detected_(false),
453      reset_factory_(this) {
454  DCHECK(browse_store_.get());
455  DCHECK(!download_store_.get());
456  DCHECK(!csd_whitelist_store_.get());
457  DCHECK(!download_whitelist_store_.get());
458  DCHECK(!extension_blacklist_store_.get());
459  DCHECK(!side_effect_free_whitelist_store_.get());
460  DCHECK(!ip_blacklist_store_.get());
461}
462
463SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
464    SafeBrowsingStore* browse_store,
465    SafeBrowsingStore* download_store,
466    SafeBrowsingStore* csd_whitelist_store,
467    SafeBrowsingStore* download_whitelist_store,
468    SafeBrowsingStore* extension_blacklist_store,
469    SafeBrowsingStore* side_effect_free_whitelist_store,
470    SafeBrowsingStore* ip_blacklist_store)
471    : creation_loop_(base::MessageLoop::current()),
472      browse_store_(browse_store),
473      download_store_(download_store),
474      csd_whitelist_store_(csd_whitelist_store),
475      download_whitelist_store_(download_whitelist_store),
476      extension_blacklist_store_(extension_blacklist_store),
477      side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
478      ip_blacklist_store_(ip_blacklist_store),
479      corruption_detected_(false),
480      reset_factory_(this) {
481  DCHECK(browse_store_.get());
482}
483
484SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
485  // The DCHECK is disabled due to crbug.com/338486 .
486  // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
487}
488
489void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
490  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
491
492  // This should not be run multiple times.
493  DCHECK(filename_base_.empty());
494
495  filename_base_ = filename_base;
496
497  // TODO(shess): The various stores are really only necessary while doing
498  // updates, or when querying a store directly (see |ContainsDownloadUrl()|).
499  // The store variables are also tested to see if a list is enabled.  Perhaps
500  // the stores could be refactored into an update object so that they are only
501  // live in memory while being actively used.  The sense of enabled probably
502  // belongs in protocol_manager or database_manager.
503
504  browse_store_->Init(
505      BrowseDBFilename(filename_base_),
506      base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
507                 base::Unretained(this)));
508
509  {
510    // NOTE: There is no need to grab the lock in this function, since
511    // until it returns, there are no pointers to this class on other
512    // threads.  Then again, that means there is no possibility of
513    // contention on the lock...
514    base::AutoLock locked(lookup_lock_);
515    browse_gethash_cache_.clear();
516    LoadPrefixSet();
517  }
518
519  if (download_store_.get()) {
520    download_store_->Init(
521        DownloadDBFilename(filename_base_),
522        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
523                   base::Unretained(this)));
524  }
525
526  if (csd_whitelist_store_.get()) {
527    csd_whitelist_store_->Init(
528        CsdWhitelistDBFilename(filename_base_),
529        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
530                   base::Unretained(this)));
531
532    std::vector<SBAddFullHash> full_hashes;
533    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
534      LoadWhitelist(full_hashes, &csd_whitelist_);
535    } else {
536      WhitelistEverything(&csd_whitelist_);
537    }
538  } else {
539    WhitelistEverything(&csd_whitelist_);  // Just to be safe.
540  }
541
542  if (download_whitelist_store_.get()) {
543    download_whitelist_store_->Init(
544        DownloadWhitelistDBFilename(filename_base_),
545        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
546                   base::Unretained(this)));
547
548    std::vector<SBAddFullHash> full_hashes;
549    if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
550      LoadWhitelist(full_hashes, &download_whitelist_);
551    } else {
552      WhitelistEverything(&download_whitelist_);
553    }
554  } else {
555    WhitelistEverything(&download_whitelist_);  // Just to be safe.
556  }
557
558  if (extension_blacklist_store_.get()) {
559    extension_blacklist_store_->Init(
560        ExtensionBlacklistDBFilename(filename_base_),
561        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
562                   base::Unretained(this)));
563  }
564
565  if (side_effect_free_whitelist_store_.get()) {
566    const base::FilePath side_effect_free_whitelist_filename =
567        SideEffectFreeWhitelistDBFilename(filename_base_);
568    const base::FilePath side_effect_free_whitelist_prefix_set_filename =
569        PrefixSetForFilename(side_effect_free_whitelist_filename);
570    side_effect_free_whitelist_store_->Init(
571        side_effect_free_whitelist_filename,
572        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
573                   base::Unretained(this)));
574
575    // Only use the prefix set if database is present and non-empty.
576    if (GetFileSizeOrZero(side_effect_free_whitelist_filename)) {
577      const base::TimeTicks before = base::TimeTicks::Now();
578      side_effect_free_whitelist_prefix_set_ =
579          safe_browsing::PrefixSet::LoadFile(
580              side_effect_free_whitelist_prefix_set_filename);
581      UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
582                          base::TimeTicks::Now() - before);
583      if (!side_effect_free_whitelist_prefix_set_.get())
584        RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
585    }
586  } else {
587    // Delete any files of the side-effect free sidelist that may be around
588    // from when it was previously enabled.
589    SafeBrowsingStoreFile::DeleteStore(
590        SideEffectFreeWhitelistDBFilename(filename_base_));
591    base::DeleteFile(
592        PrefixSetForFilename(SideEffectFreeWhitelistDBFilename(filename_base_)),
593        false);
594  }
595
596  if (ip_blacklist_store_.get()) {
597    ip_blacklist_store_->Init(
598        IpBlacklistDBFilename(filename_base_),
599        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
600                   base::Unretained(this)));
601
602    std::vector<SBAddFullHash> full_hashes;
603    if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
604      LoadIpBlacklist(full_hashes);
605    } else {
606      LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
607    }
608  }
609}
610
611bool SafeBrowsingDatabaseNew::ResetDatabase() {
612  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
613
614  // Delete files on disk.
615  // TODO(shess): Hard to see where one might want to delete without a
616  // reset.  Perhaps inline |Delete()|?
617  if (!Delete())
618    return false;
619
620  // Reset objects in memory.
621  {
622    base::AutoLock locked(lookup_lock_);
623    browse_gethash_cache_.clear();
624    browse_prefix_set_.reset();
625    side_effect_free_whitelist_prefix_set_.reset();
626    ip_blacklist_.clear();
627  }
628  // Wants to acquire the lock itself.
629  WhitelistEverything(&csd_whitelist_);
630  WhitelistEverything(&download_whitelist_);
631  return true;
632}
633
634bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
635    const GURL& url,
636    std::vector<SBPrefix>* prefix_hits,
637    std::vector<SBFullHashResult>* cache_hits) {
638  // Clear the results first.
639  prefix_hits->clear();
640  cache_hits->clear();
641
642  std::vector<SBFullHash> full_hashes;
643  BrowseFullHashesToCheck(url, false, &full_hashes);
644  if (full_hashes.empty())
645    return false;
646
647  return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits);
648}
649
650bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes(
651    const std::vector<SBFullHash>& full_hashes,
652    std::vector<SBPrefix>* prefix_hits,
653    std::vector<SBFullHashResult>* cache_hits) {
654  // Used to determine cache expiration.
655  const base::Time now = base::Time::Now();
656
657  // This function is called on the I/O thread, prevent changes to
658  // filter and caches.
659  base::AutoLock locked(lookup_lock_);
660
661  // |browse_prefix_set_| is empty until it is either read from disk, or the
662  // first update populates it.  Bail out without a hit if not yet
663  // available.
664  if (!browse_prefix_set_.get())
665    return false;
666
667  for (size_t i = 0; i < full_hashes.size(); ++i) {
668    if (!GetCachedFullHash(&browse_gethash_cache_,
669                           full_hashes[i],
670                           now,
671                           cache_hits)) {
672      // No valid cached result, check the database.
673      if (browse_prefix_set_->Exists(full_hashes[i]))
674        prefix_hits->push_back(full_hashes[i].prefix);
675    }
676  }
677
678  // Multiple full hashes could share prefix, remove duplicates.
679  std::sort(prefix_hits->begin(), prefix_hits->end());
680  prefix_hits->erase(std::unique(prefix_hits->begin(), prefix_hits->end()),
681                     prefix_hits->end());
682
683  return !prefix_hits->empty() || !cache_hits->empty();
684}
685
686bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
687    const std::vector<GURL>& urls,
688    std::vector<SBPrefix>* prefix_hits) {
689  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
690
691  // Ignore this check when download checking is not enabled.
692  if (!download_store_.get())
693    return false;
694
695  std::vector<SBPrefix> prefixes;
696  GetDownloadUrlPrefixes(urls, &prefixes);
697  return MatchAddPrefixes(download_store_.get(),
698                          safe_browsing_util::BINURL % 2,
699                          prefixes,
700                          prefix_hits);
701}
702
703bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
704  // This method is theoretically thread-safe but we expect all calls to
705  // originate from the IO thread.
706  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
707  std::vector<SBFullHash> full_hashes;
708  BrowseFullHashesToCheck(url, true, &full_hashes);
709  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
710}
711
712bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
713  std::vector<SBFullHash> full_hashes;
714  BrowseFullHashesToCheck(url, true, &full_hashes);
715  return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
716}
717
718bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
719    const std::vector<SBPrefix>& prefixes,
720    std::vector<SBPrefix>* prefix_hits) {
721  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
722  if (!extension_blacklist_store_)
723    return false;
724
725  return MatchAddPrefixes(extension_blacklist_store_.get(),
726                          safe_browsing_util::EXTENSIONBLACKLIST % 2,
727                          prefixes,
728                          prefix_hits);
729}
730
731bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
732    const GURL& url) {
733  std::string host;
734  std::string path;
735  std::string query;
736  safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
737  std::string url_to_check = host + path;
738  if (!query.empty())
739    url_to_check +=  "?" + query;
740  SBFullHash full_hash = SBFullHashForString(url_to_check);
741
742  // This function can be called on any thread, so lock against any changes
743  base::AutoLock locked(lookup_lock_);
744
745  // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
746  // from disk, or the first update populates it.  Bail out without a hit if
747  // not yet available.
748  if (!side_effect_free_whitelist_prefix_set_.get())
749    return false;
750
751  return side_effect_free_whitelist_prefix_set_->Exists(full_hash);
752}
753
754bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
755  net::IPAddressNumber ip_number;
756  if (!net::ParseIPLiteralToNumber(ip_address, &ip_number))
757    return false;
758  if (ip_number.size() == net::kIPv4AddressSize)
759    ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
760  if (ip_number.size() != net::kIPv6AddressSize)
761    return false;  // better safe than sorry.
762
763  // This function can be called from any thread.
764  base::AutoLock locked(lookup_lock_);
765  for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
766       it != ip_blacklist_.end();
767       ++it) {
768    const std::string& mask = it->first;
769    DCHECK_EQ(mask.size(), ip_number.size());
770    std::string subnet(net::kIPv6AddressSize, '\0');
771    for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
772      subnet[i] = ip_number[i] & mask[i];
773    }
774    const std::string hash = base::SHA1HashString(subnet);
775    DVLOG(2) << "Lookup Malware IP: "
776             << " ip:" << ip_address
777             << " mask:" << base::HexEncode(mask.data(), mask.size())
778             << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
779             << " hash:" << base::HexEncode(hash.data(), hash.size());
780    if (it->second.count(hash) > 0) {
781      return true;
782    }
783  }
784  return false;
785}
786
787bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
788    const std::string& str) {
789  std::vector<SBFullHash> hashes;
790  hashes.push_back(SBFullHashForString(str));
791  return ContainsWhitelistedHashes(download_whitelist_, hashes);
792}
793
794bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
795    const SBWhitelist& whitelist,
796    const std::vector<SBFullHash>& hashes) {
797  base::AutoLock l(lookup_lock_);
798  if (whitelist.second)
799    return true;
800  for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
801       it != hashes.end(); ++it) {
802    if (std::binary_search(whitelist.first.begin(), whitelist.first.end(),
803                           *it, SBFullHashLess)) {
804      return true;
805    }
806  }
807  return false;
808}
809
810// Helper to insert add-chunk entries.
811void SafeBrowsingDatabaseNew::InsertAddChunk(
812    SafeBrowsingStore* store,
813    const safe_browsing_util::ListType list_id,
814    const SBChunkData& chunk_data) {
815  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
816  DCHECK(store);
817
818  // The server can give us a chunk that we already have because
819  // it's part of a range.  Don't add it again.
820  const int chunk_id = chunk_data.ChunkNumber();
821  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
822  if (store->CheckAddChunk(encoded_chunk_id))
823    return;
824
825  store->SetAddChunk(encoded_chunk_id);
826  if (chunk_data.IsPrefix()) {
827    const size_t c = chunk_data.PrefixCount();
828    for (size_t i = 0; i < c; ++i) {
829      STATS_COUNTER("SB.PrefixAdd", 1);
830      store->WriteAddPrefix(encoded_chunk_id, chunk_data.PrefixAt(i));
831    }
832  } else {
833    const size_t c = chunk_data.FullHashCount();
834    for (size_t i = 0; i < c; ++i) {
835      STATS_COUNTER("SB.PrefixAddFull", 1);
836      store->WriteAddHash(encoded_chunk_id, chunk_data.FullHashAt(i));
837    }
838  }
839}
840
841// Helper to insert sub-chunk entries.
842void SafeBrowsingDatabaseNew::InsertSubChunk(
843    SafeBrowsingStore* store,
844    const safe_browsing_util::ListType list_id,
845    const SBChunkData& chunk_data) {
846  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
847  DCHECK(store);
848
849  // The server can give us a chunk that we already have because
850  // it's part of a range.  Don't add it again.
851  const int chunk_id = chunk_data.ChunkNumber();
852  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
853  if (store->CheckSubChunk(encoded_chunk_id))
854    return;
855
856  store->SetSubChunk(encoded_chunk_id);
857  if (chunk_data.IsPrefix()) {
858    const size_t c = chunk_data.PrefixCount();
859    for (size_t i = 0; i < c; ++i) {
860      STATS_COUNTER("SB.PrefixSub", 1);
861      const int add_chunk_id = chunk_data.AddChunkNumberAt(i);
862      const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id);
863      store->WriteSubPrefix(encoded_chunk_id, encoded_add_chunk_id,
864                            chunk_data.PrefixAt(i));
865    }
866  } else {
867    const size_t c = chunk_data.FullHashCount();
868    for (size_t i = 0; i < c; ++i) {
869      STATS_COUNTER("SB.PrefixSubFull", 1);
870      const int add_chunk_id = chunk_data.AddChunkNumberAt(i);
871      const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id);
872      store->WriteSubHash(encoded_chunk_id, encoded_add_chunk_id,
873                          chunk_data.FullHashAt(i));
874    }
875  }
876}
877
878void SafeBrowsingDatabaseNew::InsertChunks(
879    const std::string& list_name,
880    const std::vector<SBChunkData*>& chunks) {
881  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
882
883  if (corruption_detected_ || chunks.empty())
884    return;
885
886  const base::TimeTicks before = base::TimeTicks::Now();
887
888  // TODO(shess): The caller should just pass list_id.
889  const safe_browsing_util::ListType list_id =
890      safe_browsing_util::GetListId(list_name);
891
892  SafeBrowsingStore* store = GetStore(list_id);
893  if (!store) return;
894
895  change_detected_ = true;
896
897  // TODO(shess): I believe that the list is always add or sub.  Can this use
898  // that productively?
899  store->BeginChunk();
900  for (size_t i = 0; i < chunks.size(); ++i) {
901    if (chunks[i]->IsAdd()) {
902      InsertAddChunk(store, list_id, *chunks[i]);
903    } else if (chunks[i]->IsSub()) {
904      InsertSubChunk(store, list_id, *chunks[i]);
905    } else {
906      NOTREACHED();
907    }
908  }
909  store->FinishChunk();
910
911  UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
912}
913
914void SafeBrowsingDatabaseNew::DeleteChunks(
915    const std::vector<SBChunkDelete>& chunk_deletes) {
916  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
917
918  if (corruption_detected_ || chunk_deletes.empty())
919    return;
920
921  const std::string& list_name = chunk_deletes.front().list_name;
922  const safe_browsing_util::ListType list_id =
923      safe_browsing_util::GetListId(list_name);
924
925  SafeBrowsingStore* store = GetStore(list_id);
926  if (!store) return;
927
928  change_detected_ = true;
929
930  for (size_t i = 0; i < chunk_deletes.size(); ++i) {
931    std::vector<int> chunk_numbers;
932    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
933    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
934      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
935      if (chunk_deletes[i].is_sub_del)
936        store->DeleteSubChunk(encoded_chunk_id);
937      else
938        store->DeleteAddChunk(encoded_chunk_id);
939    }
940  }
941}
942
943void SafeBrowsingDatabaseNew::CacheHashResults(
944    const std::vector<SBPrefix>& prefixes,
945    const std::vector<SBFullHashResult>& full_hits,
946    const base::TimeDelta& cache_lifetime) {
947  const base::Time expire_after = base::Time::Now() + cache_lifetime;
948
949  // This is called on the I/O thread, lock against updates.
950  base::AutoLock locked(lookup_lock_);
951
952  // Create or reset all cached results for these prefixes.
953  for (size_t i = 0; i < prefixes.size(); ++i) {
954    browse_gethash_cache_[prefixes[i]] = SBCachedFullHashResult(expire_after);
955  }
956
957  // Insert any fullhash hits. Note that there may be one, multiple, or no
958  // fullhashes for any given entry in |prefixes|.
959  for (size_t i = 0; i < full_hits.size(); ++i) {
960    const SBPrefix prefix = full_hits[i].hash.prefix;
961    browse_gethash_cache_[prefix].full_hashes.push_back(full_hits[i]);
962  }
963}
964
965bool SafeBrowsingDatabaseNew::UpdateStarted(
966    std::vector<SBListChunkRanges>* lists) {
967  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
968  DCHECK(lists);
969
970  // If |BeginUpdate()| fails, reset the database.
971  if (!browse_store_->BeginUpdate()) {
972    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
973    HandleCorruptDatabase();
974    return false;
975  }
976
977  if (download_store_.get() && !download_store_->BeginUpdate()) {
978    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
979    HandleCorruptDatabase();
980    return false;
981  }
982
983  if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
984    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
985    HandleCorruptDatabase();
986    return false;
987  }
988
989  if (download_whitelist_store_.get() &&
990      !download_whitelist_store_->BeginUpdate()) {
991    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
992    HandleCorruptDatabase();
993    return false;
994  }
995
996  if (extension_blacklist_store_ &&
997      !extension_blacklist_store_->BeginUpdate()) {
998    RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
999    HandleCorruptDatabase();
1000    return false;
1001  }
1002
1003  if (side_effect_free_whitelist_store_ &&
1004      !side_effect_free_whitelist_store_->BeginUpdate()) {
1005    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1006    HandleCorruptDatabase();
1007    return false;
1008  }
1009
1010  if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1011    RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1012    HandleCorruptDatabase();
1013    return false;
1014  }
1015
1016  {
1017    base::AutoLock locked(lookup_lock_);
1018    // Cached fullhash results must be cleared on every database update (whether
1019    // successful or not.)
1020    browse_gethash_cache_.clear();
1021  }
1022
1023  UpdateChunkRangesForLists(browse_store_.get(),
1024                            safe_browsing_util::kMalwareList,
1025                            safe_browsing_util::kPhishingList,
1026                            lists);
1027
1028  // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1029  // deprecated.  Code to delete the list from the store shows ~15k hits/day as
1030  // of Feb 2014, so it has been removed.  Everything _should_ be resilient to
1031  // extra data of that sort.
1032  UpdateChunkRangesForList(download_store_.get(),
1033                           safe_browsing_util::kBinUrlList, lists);
1034
1035  UpdateChunkRangesForList(csd_whitelist_store_.get(),
1036                           safe_browsing_util::kCsdWhiteList, lists);
1037
1038  UpdateChunkRangesForList(download_whitelist_store_.get(),
1039                           safe_browsing_util::kDownloadWhiteList, lists);
1040
1041  UpdateChunkRangesForList(extension_blacklist_store_.get(),
1042                           safe_browsing_util::kExtensionBlacklist, lists);
1043
1044  UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(),
1045                           safe_browsing_util::kSideEffectFreeWhitelist, lists);
1046
1047  UpdateChunkRangesForList(ip_blacklist_store_.get(),
1048                           safe_browsing_util::kIPBlacklist, lists);
1049
1050  corruption_detected_ = false;
1051  change_detected_ = false;
1052  return true;
1053}
1054
1055void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1056  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1057
1058  // The update may have failed due to corrupt storage (for instance,
1059  // an excessive number of invalid add_chunks and sub_chunks).
1060  // Double-check that the databases are valid.
1061  // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1062  // sections would allow throwing a corruption error in
1063  // UpdateStarted().
1064  if (!update_succeeded) {
1065    if (!browse_store_->CheckValidity())
1066      DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1067
1068    if (download_store_.get() && !download_store_->CheckValidity())
1069      DLOG(ERROR) << "Safe-browsing download database corrupt.";
1070
1071    if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1072      DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1073
1074    if (download_whitelist_store_.get() &&
1075        !download_whitelist_store_->CheckValidity()) {
1076      DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1077    }
1078
1079    if (extension_blacklist_store_ &&
1080        !extension_blacklist_store_->CheckValidity()) {
1081      DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1082    }
1083
1084    if (side_effect_free_whitelist_store_ &&
1085        !side_effect_free_whitelist_store_->CheckValidity()) {
1086      DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1087                  << "corrupt.";
1088    }
1089
1090    if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1091      DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1092    }
1093  }
1094
1095  if (corruption_detected_)
1096    return;
1097
1098  // Unroll the transaction if there was a protocol error or if the
1099  // transaction was empty.  This will leave the prefix set, the
1100  // pending hashes, and the prefix miss cache in place.
1101  if (!update_succeeded || !change_detected_) {
1102    // Track empty updates to answer questions at http://crbug.com/72216 .
1103    if (update_succeeded && !change_detected_)
1104      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1105    browse_store_->CancelUpdate();
1106    if (download_store_.get())
1107      download_store_->CancelUpdate();
1108    if (csd_whitelist_store_.get())
1109      csd_whitelist_store_->CancelUpdate();
1110    if (download_whitelist_store_.get())
1111      download_whitelist_store_->CancelUpdate();
1112    if (extension_blacklist_store_)
1113      extension_blacklist_store_->CancelUpdate();
1114    if (side_effect_free_whitelist_store_)
1115      side_effect_free_whitelist_store_->CancelUpdate();
1116    if (ip_blacklist_store_)
1117      ip_blacklist_store_->CancelUpdate();
1118    return;
1119  }
1120
1121  if (download_store_) {
1122    int64 size_bytes = UpdateHashPrefixStore(
1123        DownloadDBFilename(filename_base_),
1124        download_store_.get(),
1125        FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1126    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1127                         static_cast<int>(size_bytes / 1024));
1128  }
1129
1130  UpdateBrowseStore();
1131  UpdateWhitelistStore(CsdWhitelistDBFilename(filename_base_),
1132                       csd_whitelist_store_.get(),
1133                       &csd_whitelist_);
1134  UpdateWhitelistStore(DownloadWhitelistDBFilename(filename_base_),
1135                       download_whitelist_store_.get(),
1136                       &download_whitelist_);
1137
1138  if (extension_blacklist_store_) {
1139    int64 size_bytes = UpdateHashPrefixStore(
1140        ExtensionBlacklistDBFilename(filename_base_),
1141        extension_blacklist_store_.get(),
1142        FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1143    UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1144                         static_cast<int>(size_bytes / 1024));
1145  }
1146
1147  if (side_effect_free_whitelist_store_)
1148    UpdateSideEffectFreeWhitelistStore();
1149
1150  if (ip_blacklist_store_)
1151    UpdateIpBlacklistStore();
1152}
1153
1154void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1155    const base::FilePath& store_filename,
1156    SafeBrowsingStore* store,
1157    SBWhitelist* whitelist) {
1158  if (!store)
1159    return;
1160
1161  // Note: |builder| will not be empty.  The current data store implementation
1162  // stores all full-length hashes as both full and prefix hashes.
1163  safe_browsing::PrefixSetBuilder builder;
1164  std::vector<SBAddFullHash> full_hashes;
1165  if (!store->FinishUpdate(&builder, &full_hashes)) {
1166    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1167    WhitelistEverything(whitelist);
1168    return;
1169  }
1170
1171#if defined(OS_MACOSX)
1172  base::mac::SetFileBackupExclusion(store_filename);
1173#endif
1174
1175  LoadWhitelist(full_hashes, whitelist);
1176}
1177
1178int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1179    const base::FilePath& store_filename,
1180    SafeBrowsingStore* store,
1181    FailureType failure_type) {
1182  // These results are not used after this call. Simply ignore the
1183  // returned value after FinishUpdate(...).
1184  safe_browsing::PrefixSetBuilder builder;
1185  std::vector<SBAddFullHash> add_full_hashes_result;
1186
1187  if (!store->FinishUpdate(&builder, &add_full_hashes_result))
1188    RecordFailure(failure_type);
1189
1190#if defined(OS_MACOSX)
1191  base::mac::SetFileBackupExclusion(store_filename);
1192#endif
1193
1194  return GetFileSizeOrZero(store_filename);
1195}
1196
1197void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1198  // Measure the amount of IO during the filter build.
1199  base::IoCounters io_before, io_after;
1200  base::ProcessHandle handle = base::Process::Current().handle();
1201  scoped_ptr<base::ProcessMetrics> metric(
1202#if !defined(OS_MACOSX)
1203      base::ProcessMetrics::CreateProcessMetrics(handle)
1204#else
1205      // Getting stats only for the current process is enough, so NULL is fine.
1206      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1207#endif
1208  );
1209
1210  // IoCounters are currently not supported on Mac, and may not be
1211  // available for Linux, so we check the result and only show IO
1212  // stats if they are available.
1213  const bool got_counters = metric->GetIOCounters(&io_before);
1214
1215  const base::TimeTicks before = base::TimeTicks::Now();
1216
1217  // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the
1218  // fly?  Other clients use the SBAddFullHash vector, but AFAICT they only use
1219  // the SBFullHash portion.  It would need an accessor on PrefixSet.
1220  safe_browsing::PrefixSetBuilder builder;
1221  std::vector<SBAddFullHash> add_full_hashes;
1222  if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
1223    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1224    return;
1225  }
1226
1227  std::vector<SBFullHash> full_hash_results;
1228  for (size_t i = 0; i < add_full_hashes.size(); ++i) {
1229    full_hash_results.push_back(add_full_hashes[i].full_hash);
1230  }
1231
1232  scoped_ptr<safe_browsing::PrefixSet>
1233      prefix_set(builder.GetPrefixSet(full_hash_results));
1234
1235  // Swap in the newly built filter.
1236  {
1237    base::AutoLock locked(lookup_lock_);
1238    browse_prefix_set_.swap(prefix_set);
1239  }
1240
1241  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1242
1243  // Persist the prefix set to disk.  Since only this thread changes
1244  // |browse_prefix_set_|, there is no need to lock.
1245  WritePrefixSet();
1246
1247  // Gather statistics.
1248  if (got_counters && metric->GetIOCounters(&io_after)) {
1249    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1250                         static_cast<int>(io_after.ReadTransferCount -
1251                                          io_before.ReadTransferCount) / 1024);
1252    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1253                         static_cast<int>(io_after.WriteTransferCount -
1254                                          io_before.WriteTransferCount) / 1024);
1255    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1256                         static_cast<int>(io_after.ReadOperationCount -
1257                                          io_before.ReadOperationCount));
1258    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1259                         static_cast<int>(io_after.WriteOperationCount -
1260                                          io_before.WriteOperationCount));
1261  }
1262
1263  const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1264  const int64 file_size = GetFileSizeOrZero(browse_filename);
1265  UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1266                       static_cast<int>(file_size / 1024));
1267
1268#if defined(OS_MACOSX)
1269  base::mac::SetFileBackupExclusion(browse_filename);
1270#endif
1271}
1272
1273void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1274  safe_browsing::PrefixSetBuilder builder;
1275  std::vector<SBAddFullHash> add_full_hashes_result;
1276
1277  if (!side_effect_free_whitelist_store_->FinishUpdate(
1278          &builder, &add_full_hashes_result)) {
1279    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1280    return;
1281  }
1282  scoped_ptr<safe_browsing::PrefixSet>
1283      prefix_set(builder.GetPrefixSetNoHashes());
1284
1285  // Swap in the newly built prefix set.
1286  {
1287    base::AutoLock locked(lookup_lock_);
1288    side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1289  }
1290
1291  const base::FilePath side_effect_free_whitelist_filename =
1292      SideEffectFreeWhitelistDBFilename(filename_base_);
1293  const base::FilePath side_effect_free_whitelist_prefix_set_filename =
1294      PrefixSetForFilename(side_effect_free_whitelist_filename);
1295  const base::TimeTicks before = base::TimeTicks::Now();
1296  const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1297      side_effect_free_whitelist_prefix_set_filename);
1298  UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1299                      base::TimeTicks::Now() - before);
1300
1301  if (!write_ok)
1302    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1303
1304  // Gather statistics.
1305  int64 file_size = GetFileSizeOrZero(
1306      side_effect_free_whitelist_prefix_set_filename);
1307  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1308                       static_cast<int>(file_size / 1024));
1309  file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename);
1310  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1311                       static_cast<int>(file_size / 1024));
1312
1313#if defined(OS_MACOSX)
1314  base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename);
1315  base::mac::SetFileBackupExclusion(
1316      side_effect_free_whitelist_prefix_set_filename);
1317#endif
1318}
1319
1320void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1321  // Note: prefixes will not be empty.  The current data store implementation
1322  // stores all full-length hashes as both full and prefix hashes.
1323  safe_browsing::PrefixSetBuilder builder;
1324  std::vector<SBAddFullHash> full_hashes;
1325  if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1326    RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1327    LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
1328    return;
1329  }
1330
1331#if defined(OS_MACOSX)
1332  base::mac::SetFileBackupExclusion(IpBlacklistDBFilename(filename_base_));
1333#endif
1334
1335  LoadIpBlacklist(full_hashes);
1336}
1337
1338void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1339  // Reset the database after the current task has unwound (but only
1340  // reset once within the scope of a given task).
1341  if (!reset_factory_.HasWeakPtrs()) {
1342    RecordFailure(FAILURE_DATABASE_CORRUPT);
1343    base::MessageLoop::current()->PostTask(FROM_HERE,
1344        base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1345                   reset_factory_.GetWeakPtr()));
1346  }
1347}
1348
1349void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1350  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1351  corruption_detected_ = true;  // Stop updating the database.
1352  ResetDatabase();
1353
1354  // NOTE(shess): ResetDatabase() should remove the corruption, so this should
1355  // only happen once.  If you are here because you are hitting this after a
1356  // restart, then I would be very interested in working with you to figure out
1357  // what is happening, since it may affect real users.
1358  DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1359}
1360
1361// TODO(shess): I'm not clear why this code doesn't have any
1362// real error-handling.
1363void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1364  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1365  DCHECK(!filename_base_.empty());
1366
1367  const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1368  const base::FilePath browse_prefix_set_filename =
1369      PrefixSetForFilename(browse_filename);
1370
1371  // Only use the prefix set if database is present and non-empty.
1372  if (!GetFileSizeOrZero(browse_filename))
1373    return;
1374
1375  // Cleanup any stale bloom filter (no longer used).
1376  // TODO(shess): Track existence to drive removal of this code?
1377  const base::FilePath bloom_filter_filename =
1378      BloomFilterForFilename(browse_filename);
1379  base::DeleteFile(bloom_filter_filename, false);
1380
1381  const base::TimeTicks before = base::TimeTicks::Now();
1382  browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile(
1383      browse_prefix_set_filename);
1384  UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1385
1386  if (!browse_prefix_set_.get())
1387    RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1388}
1389
1390bool SafeBrowsingDatabaseNew::Delete() {
1391  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1392  DCHECK(!filename_base_.empty());
1393
1394  // TODO(shess): This is a mess.  SafeBrowsingFileStore::Delete() closes the
1395  // store before calling DeleteStore().  DeleteStore() deletes transient files
1396  // in addition to the main file.  Probably all of these should be converted to
1397  // a helper which calls Delete() if the store exists, else DeleteStore() on
1398  // the generated filename.
1399
1400  // TODO(shess): Determine if the histograms are useful in any way.  I cannot
1401  // recall any action taken as a result of their values, in which case it might
1402  // make more sense to histogram an overall thumbs-up/-down and just dig deeper
1403  // if something looks wrong.
1404
1405  const bool r1 = browse_store_->Delete();
1406  if (!r1)
1407    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1408
1409  const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1410  if (!r2)
1411    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1412
1413  const bool r3 = csd_whitelist_store_.get() ?
1414      csd_whitelist_store_->Delete() : true;
1415  if (!r3)
1416    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1417
1418  const bool r4 = download_whitelist_store_.get() ?
1419      download_whitelist_store_->Delete() : true;
1420  if (!r4)
1421    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1422
1423  const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1424  const base::FilePath bloom_filter_filename =
1425      BloomFilterForFilename(browse_filename);
1426  const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1427  if (!r5)
1428    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1429
1430  const base::FilePath browse_prefix_set_filename =
1431      PrefixSetForFilename(browse_filename);
1432  const bool r6 = base::DeleteFile(browse_prefix_set_filename, false);
1433  if (!r6)
1434    RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1435
1436  const base::FilePath extension_blacklist_filename =
1437      ExtensionBlacklistDBFilename(filename_base_);
1438  const bool r7 = base::DeleteFile(extension_blacklist_filename, false);
1439  if (!r7)
1440    RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1441
1442  const base::FilePath side_effect_free_whitelist_filename =
1443      SideEffectFreeWhitelistDBFilename(filename_base_);
1444  const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename,
1445                                   false);
1446  if (!r8)
1447    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1448
1449  const base::FilePath side_effect_free_whitelist_prefix_set_filename =
1450      PrefixSetForFilename(side_effect_free_whitelist_filename);
1451  const bool r9 = base::DeleteFile(
1452      side_effect_free_whitelist_prefix_set_filename,
1453      false);
1454  if (!r9)
1455    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1456
1457  const bool r10 = base::DeleteFile(IpBlacklistDBFilename(filename_base_),
1458                                    false);
1459  if (!r10)
1460    RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1461
1462  return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
1463}
1464
1465void SafeBrowsingDatabaseNew::WritePrefixSet() {
1466  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1467
1468  if (!browse_prefix_set_.get())
1469    return;
1470
1471  const base::FilePath browse_filename = BrowseDBFilename(filename_base_);
1472  const base::FilePath browse_prefix_set_filename =
1473      PrefixSetForFilename(browse_filename);
1474
1475  const base::TimeTicks before = base::TimeTicks::Now();
1476  const bool write_ok = browse_prefix_set_->WriteFile(
1477      browse_prefix_set_filename);
1478  UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1479
1480  const int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename);
1481  UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1482                       static_cast<int>(file_size / 1024));
1483
1484  if (!write_ok)
1485    RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1486
1487#if defined(OS_MACOSX)
1488  base::mac::SetFileBackupExclusion(browse_prefix_set_filename);
1489#endif
1490}
1491
1492void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1493  base::AutoLock locked(lookup_lock_);
1494  whitelist->second = true;
1495  whitelist->first.clear();
1496}
1497
1498void SafeBrowsingDatabaseNew::LoadWhitelist(
1499    const std::vector<SBAddFullHash>& full_hashes,
1500    SBWhitelist* whitelist) {
1501  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1502  if (full_hashes.size() > kMaxWhitelistSize) {
1503    WhitelistEverything(whitelist);
1504    return;
1505  }
1506
1507  std::vector<SBFullHash> new_whitelist;
1508  new_whitelist.reserve(full_hashes.size());
1509  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1510       it != full_hashes.end(); ++it) {
1511    new_whitelist.push_back(it->full_hash);
1512  }
1513  std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess);
1514
1515  SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl);
1516  if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1517                         kill_switch, SBFullHashLess)) {
1518    // The kill switch is whitelisted hence we whitelist all URLs.
1519    WhitelistEverything(whitelist);
1520  } else {
1521    base::AutoLock locked(lookup_lock_);
1522    whitelist->second = false;
1523    whitelist->first.swap(new_whitelist);
1524  }
1525}
1526
1527void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1528    const std::vector<SBAddFullHash>& full_hashes) {
1529  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1530  IPBlacklist new_blacklist;
1531  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1532       it != full_hashes.end();
1533       ++it) {
1534    const char* full_hash = it->full_hash.full_hash;
1535    DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1536    // The format of the IP blacklist is:
1537    // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1538    std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1539    size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1540    if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1541      RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1542      new_blacklist.clear();  // Load empty blacklist.
1543      break;
1544    }
1545
1546    // We precompute the mask for the given subnet size to speed up lookups.
1547    // Basically we need to create a 16B long string which has the highest
1548    // |size| bits sets to one.
1549    std::string mask(net::kIPv6AddressSize, '\0');
1550    mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1551    if ((prefix_size % 8) != 0) {
1552      mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1553    }
1554    DVLOG(2) << "Inserting malicious IP: "
1555             << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1556             << " mask:" << base::HexEncode(mask.data(), mask.size())
1557             << " prefix_size:" << prefix_size
1558             << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1559                                                 hashed_ip_prefix.size());
1560    new_blacklist[mask].insert(hashed_ip_prefix);
1561  }
1562
1563  base::AutoLock locked(lookup_lock_);
1564  ip_blacklist_.swap(new_blacklist);
1565}
1566
1567bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1568  SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1569  std::vector<SBFullHash> full_hashes;
1570  full_hashes.push_back(malware_kill_switch);
1571  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1572}
1573
1574bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() {
1575  return csd_whitelist_.second;
1576}
1577