safe_browsing_database.cc revision f8ee788a64d60abd8f2d742a5fdedde054ecd910
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7#include <algorithm>
8#include <iterator>
9
10#include "base/bind.h"
11#include "base/file_util.h"
12#include "base/message_loop/message_loop.h"
13#include "base/metrics/histogram.h"
14#include "base/metrics/stats_counters.h"
15#include "base/process/process.h"
16#include "base/process/process_metrics.h"
17#include "base/sha1.h"
18#include "base/strings/string_number_conversions.h"
19#include "base/strings/stringprintf.h"
20#include "base/time/time.h"
21#include "chrome/browser/safe_browsing/prefix_set.h"
22#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23#include "content/public/browser/browser_thread.h"
24#include "crypto/sha2.h"
25#include "net/base/net_util.h"
26#include "url/gurl.h"
27
28#if defined(OS_MACOSX)
29#include "base/mac/mac_util.h"
30#endif
31
32using content::BrowserThread;
33
34namespace {
35
36// Filename suffix for the bloom filter.
37const base::FilePath::CharType kBloomFilterFile[] =
38    FILE_PATH_LITERAL(" Filter 2");
39// Filename suffix for the prefix set.
40const base::FilePath::CharType kPrefixSetFile[] =
41    FILE_PATH_LITERAL(" Prefix Set");
42// Filename suffix for download store.
43const base::FilePath::CharType kDownloadDBFile[] =
44    FILE_PATH_LITERAL(" Download");
45// Filename suffix for client-side phishing detection whitelist store.
46const base::FilePath::CharType kCsdWhitelistDBFile[] =
47    FILE_PATH_LITERAL(" Csd Whitelist");
48// Filename suffix for the download whitelist store.
49const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50    FILE_PATH_LITERAL(" Download Whitelist");
51// Filename suffix for the extension blacklist store.
52const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53    FILE_PATH_LITERAL(" Extension Blacklist");
54// Filename suffix for the side-effect free whitelist store.
55const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56    FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57// Filename suffix for the csd malware IP blacklist store.
58const base::FilePath::CharType kIPBlacklistDBFile[] =
59    FILE_PATH_LITERAL(" IP Blacklist");
60
61// Filename suffix for browse store.
62// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63// Unfortunately, to change the name implies lots of transition code
64// for little benefit.  If/when file formats change (say to put all
65// the data in one file), that would be a convenient point to rectify
66// this.
67const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
68
69// Maximum number of entries we allow in any of the whitelists.
70// If a whitelist on disk contains more entries then all lookups to
71// the whitelist will be considered a match.
72const size_t kMaxWhitelistSize = 5000;
73
74// If the hash of this exact expression is on a whitelist then all
75// lookups to this whitelist will be considered a match.
76const char kWhitelistKillSwitchUrl[] =
77    "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
78
79// If the hash of this exact expression is on a whitelist then the
80// malware IP blacklisting feature will be disabled in csd.
81// Don't change this!
82const char kMalwareIPKillSwitchUrl[] =
83    "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
84
85const size_t kMaxIpPrefixSize = 128;
86const size_t kMinIpPrefixSize = 1;
87
88// To save space, the incoming |chunk_id| and |list_id| are combined
89// into an |encoded_chunk_id| for storage by shifting the |list_id|
90// into the low-order bits.  These functions decode that information.
91// TODO(lzheng): It was reasonable when database is saved in sqlite, but
92// there should be better ways to save chunk_id and list_id after we use
93// SafeBrowsingStoreFile.
94int GetListIdBit(const int encoded_chunk_id) {
95  return encoded_chunk_id & 1;
96}
97int DecodeChunkId(int encoded_chunk_id) {
98  return encoded_chunk_id >> 1;
99}
100int EncodeChunkId(const int chunk, const int list_id) {
101  DCHECK_NE(list_id, safe_browsing_util::INVALID);
102  return chunk << 1 | list_id % 2;
103}
104
105// Generate the set of full hashes to check for |url|.  If
106// |include_whitelist_hashes| is true we will generate additional path-prefixes
107// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
108// whitelist it should also match /foo/bar which is not the case for all the
109// other lists.  We'll also always add a pattern for the empty path.
110// TODO(shess): This function is almost the same as
111// |CompareFullHashes()| in safe_browsing_util.cc, except that code
112// does an early exit on match.  Since match should be the infrequent
113// case (phishing or malware found), consider combining this function
114// with that one.
115void BrowseFullHashesToCheck(const GURL& url,
116                             bool include_whitelist_hashes,
117                             std::vector<SBFullHash>* full_hashes) {
118  std::vector<std::string> hosts;
119  if (url.HostIsIPAddress()) {
120    hosts.push_back(url.host());
121  } else {
122    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
123  }
124
125  std::vector<std::string> paths;
126  safe_browsing_util::GeneratePathsToCheck(url, &paths);
127
128  for (size_t i = 0; i < hosts.size(); ++i) {
129    for (size_t j = 0; j < paths.size(); ++j) {
130      const std::string& path = paths[j];
131      full_hashes->push_back(SBFullHashForString(hosts[i] + path));
132
133      // We may have /foo as path-prefix in the whitelist which should
134      // also match with /foo/bar and /foo?bar.  Hence, for every path
135      // that ends in '/' we also add the path without the slash.
136      if (include_whitelist_hashes &&
137          path.size() > 1 &&
138          path[path.size() - 1] == '/') {
139        full_hashes->push_back(
140            SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1)));
141      }
142    }
143  }
144}
145
146// Get the prefixes matching the download |urls|.
147void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
148                            std::vector<SBPrefix>* prefixes) {
149  std::vector<SBFullHash> full_hashes;
150  for (size_t i = 0; i < urls.size(); ++i)
151    BrowseFullHashesToCheck(urls[i], false, &full_hashes);
152
153  for (size_t i = 0; i < full_hashes.size(); ++i)
154    prefixes->push_back(full_hashes[i].prefix);
155}
156
157// Helper function to compare addprefixes in |store| with |prefixes|.
158// The |list_bit| indicates which list (url or hash) to compare.
159//
160// Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
161// the actual matching prefixes.
162bool MatchAddPrefixes(SafeBrowsingStore* store,
163                      int list_bit,
164                      const std::vector<SBPrefix>& prefixes,
165                      std::vector<SBPrefix>* prefix_hits) {
166  prefix_hits->clear();
167  bool found_match = false;
168
169  SBAddPrefixes add_prefixes;
170  store->GetAddPrefixes(&add_prefixes);
171  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
172       iter != add_prefixes.end(); ++iter) {
173    for (size_t j = 0; j < prefixes.size(); ++j) {
174      const SBPrefix& prefix = prefixes[j];
175      if (prefix == iter->prefix &&
176          GetListIdBit(iter->chunk_id) == list_bit) {
177        prefix_hits->push_back(prefix);
178        found_match = true;
179      }
180    }
181  }
182  return found_match;
183}
184
185// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
186// add them to |full_hits| if not expired.  "Not expired" is when
187// either |last_update| was recent enough, or the item has been
188// received recently enough.  Expired items are not deleted because a
189// future update may make them acceptable again.
190//
191// For efficiency reasons the code walks |prefix_hits| and
192// |full_hashes| in parallel, so they must be sorted by prefix.
193void GetCachedFullHashesForBrowse(
194    const std::vector<SBPrefix>& prefix_hits,
195    const std::vector<SBFullHashCached>& full_hashes,
196    std::vector<SBFullHashResult>* full_hits) {
197  const base::Time now = base::Time::Now();
198
199  std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
200  std::vector<SBFullHashCached>::const_iterator hiter = full_hashes.begin();
201
202  while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
203    if (*piter < hiter->hash.prefix) {
204      ++piter;
205    } else if (hiter->hash.prefix < *piter) {
206      ++hiter;
207    } else {
208      if (now <= hiter->expire_after) {
209        SBFullHashResult result;
210        result.list_id = hiter->list_id;
211        result.hash = hiter->hash;
212        full_hits->push_back(result);
213      }
214
215      // Only increment |hiter|, |piter| might have multiple hits.
216      ++hiter;
217    }
218  }
219}
220
221// This function generates a chunk range string for |chunks|. It
222// outputs one chunk range string per list and writes it to the
223// |list_ranges| vector.  We expect |list_ranges| to already be of the
224// right size.  E.g., if |chunks| contains chunks with two different
225// list ids then |list_ranges| must contain two elements.
226void GetChunkRanges(const std::vector<int>& chunks,
227                    std::vector<std::string>* list_ranges) {
228  // Since there are 2 possible list ids, there must be exactly two
229  // list ranges.  Even if the chunk data should only contain one
230  // line, this code has to somehow handle corruption.
231  DCHECK_EQ(2U, list_ranges->size());
232
233  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
234  for (std::vector<int>::const_iterator iter = chunks.begin();
235       iter != chunks.end(); ++iter) {
236    int mod_list_id = GetListIdBit(*iter);
237    DCHECK_GE(mod_list_id, 0);
238    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
239    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
240  }
241  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
242    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
243  }
244}
245
246// Helper function to create chunk range lists for Browse related
247// lists.
248void UpdateChunkRanges(SafeBrowsingStore* store,
249                       const std::vector<std::string>& listnames,
250                       std::vector<SBListChunkRanges>* lists) {
251  if (!store)
252    return;
253
254  DCHECK_GT(listnames.size(), 0U);
255  DCHECK_LE(listnames.size(), 2U);
256  std::vector<int> add_chunks;
257  std::vector<int> sub_chunks;
258  store->GetAddChunks(&add_chunks);
259  store->GetSubChunks(&sub_chunks);
260
261  // Always decode 2 ranges, even if only the first one is expected.
262  // The loop below will only load as many into |lists| as |listnames|
263  // indicates.
264  std::vector<std::string> adds(2);
265  std::vector<std::string> subs(2);
266  GetChunkRanges(add_chunks, &adds);
267  GetChunkRanges(sub_chunks, &subs);
268
269  for (size_t i = 0; i < listnames.size(); ++i) {
270    const std::string& listname = listnames[i];
271    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
272              static_cast<int>(i % 2));
273    DCHECK_NE(safe_browsing_util::GetListId(listname),
274              safe_browsing_util::INVALID);
275    lists->push_back(SBListChunkRanges(listname));
276    lists->back().adds.swap(adds[i]);
277    lists->back().subs.swap(subs[i]);
278  }
279}
280
281void UpdateChunkRangesForLists(SafeBrowsingStore* store,
282                               const std::string& listname0,
283                               const std::string& listname1,
284                               std::vector<SBListChunkRanges>* lists) {
285  std::vector<std::string> listnames;
286  listnames.push_back(listname0);
287  listnames.push_back(listname1);
288  UpdateChunkRanges(store, listnames, lists);
289}
290
291void UpdateChunkRangesForList(SafeBrowsingStore* store,
292                              const std::string& listname,
293                              std::vector<SBListChunkRanges>* lists) {
294  UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
295}
296
297// Order |SBFullHashCached| items on the prefix part.
298bool SBFullHashCachedPrefixLess(const SBFullHashCached& a,
299                                const SBFullHashCached& b) {
300  return a.hash.prefix < b.hash.prefix;
301}
302
303// This code always checks for non-zero file size.  This helper makes
304// that less verbose.
305int64 GetFileSizeOrZero(const base::FilePath& file_path) {
306  int64 size_64;
307  if (!base::GetFileSize(file_path, &size_64))
308    return 0;
309  return size_64;
310}
311
312}  // namespace
313
314// The default SafeBrowsingDatabaseFactory.
315class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
316 public:
317  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
318      bool enable_download_protection,
319      bool enable_client_side_whitelist,
320      bool enable_download_whitelist,
321      bool enable_extension_blacklist,
322      bool enable_side_effect_free_whitelist,
323      bool enable_ip_blacklist) OVERRIDE {
324    return new SafeBrowsingDatabaseNew(
325        new SafeBrowsingStoreFile,
326        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
327        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
328        enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
329        enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
330        enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
331        enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
332  }
333
334  SafeBrowsingDatabaseFactoryImpl() { }
335
336 private:
337  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
338};
339
340// static
341SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
342
343// Factory method, non-thread safe. Caller has to make sure this s called
344// on SafeBrowsing Thread.
345// TODO(shess): There's no need for a factory any longer.  Convert
346// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
347// callers just construct things directly.
348SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
349    bool enable_download_protection,
350    bool enable_client_side_whitelist,
351    bool enable_download_whitelist,
352    bool enable_extension_blacklist,
353    bool enable_side_effect_free_whitelist,
354    bool enable_ip_blacklist) {
355  if (!factory_)
356    factory_ = new SafeBrowsingDatabaseFactoryImpl();
357  return factory_->CreateSafeBrowsingDatabase(
358      enable_download_protection,
359      enable_client_side_whitelist,
360      enable_download_whitelist,
361      enable_extension_blacklist,
362      enable_side_effect_free_whitelist,
363      enable_ip_blacklist);
364}
365
366SafeBrowsingDatabase::~SafeBrowsingDatabase() {
367}
368
369// static
370base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
371    const base::FilePath& db_base_filename) {
372  return base::FilePath(db_base_filename.value() + kBrowseDBFile);
373}
374
375// static
376base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
377    const base::FilePath& db_base_filename) {
378  return base::FilePath(db_base_filename.value() + kDownloadDBFile);
379}
380
381// static
382base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
383    const base::FilePath& db_filename) {
384  return base::FilePath(db_filename.value() + kBloomFilterFile);
385}
386
387// static
388base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
389    const base::FilePath& db_filename) {
390  return base::FilePath(db_filename.value() + kPrefixSetFile);
391}
392
393// static
394base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
395    const base::FilePath& db_filename) {
396  return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
397}
398
399// static
400base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
401    const base::FilePath& db_filename) {
402  return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
403}
404
405// static
406base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
407    const base::FilePath& db_filename) {
408  return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
409}
410
411// static
412base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
413    const base::FilePath& db_filename) {
414  return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
415}
416
417// static
418base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
419    const base::FilePath& db_filename) {
420  return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
421}
422
423SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
424  if (list_id == safe_browsing_util::PHISH ||
425      list_id == safe_browsing_util::MALWARE) {
426    return browse_store_.get();
427  } else if (list_id == safe_browsing_util::BINURL) {
428    return download_store_.get();
429  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
430    return csd_whitelist_store_.get();
431  } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
432    return download_whitelist_store_.get();
433  } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
434    return extension_blacklist_store_.get();
435  } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
436    return side_effect_free_whitelist_store_.get();
437  } else if (list_id == safe_browsing_util::IPBLACKLIST) {
438    return ip_blacklist_store_.get();
439  }
440  return NULL;
441}
442
443// static
444void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
445  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
446                            FAILURE_DATABASE_MAX);
447}
448
449SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
450    : creation_loop_(base::MessageLoop::current()),
451      browse_store_(new SafeBrowsingStoreFile),
452      reset_factory_(this),
453      corruption_detected_(false),
454      change_detected_(false) {
455  DCHECK(browse_store_.get());
456  DCHECK(!download_store_.get());
457  DCHECK(!csd_whitelist_store_.get());
458  DCHECK(!download_whitelist_store_.get());
459  DCHECK(!extension_blacklist_store_.get());
460  DCHECK(!side_effect_free_whitelist_store_.get());
461  DCHECK(!ip_blacklist_store_.get());
462}
463
464SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
465    SafeBrowsingStore* browse_store,
466    SafeBrowsingStore* download_store,
467    SafeBrowsingStore* csd_whitelist_store,
468    SafeBrowsingStore* download_whitelist_store,
469    SafeBrowsingStore* extension_blacklist_store,
470    SafeBrowsingStore* side_effect_free_whitelist_store,
471    SafeBrowsingStore* ip_blacklist_store)
472    : creation_loop_(base::MessageLoop::current()),
473      browse_store_(browse_store),
474      download_store_(download_store),
475      csd_whitelist_store_(csd_whitelist_store),
476      download_whitelist_store_(download_whitelist_store),
477      extension_blacklist_store_(extension_blacklist_store),
478      side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
479      ip_blacklist_store_(ip_blacklist_store),
480      reset_factory_(this),
481      corruption_detected_(false) {
482  DCHECK(browse_store_.get());
483}
484
485SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
486  // The DCHECK is disabled due to crbug.com/338486 .
487  // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
488}
489
490void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
491  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
492  // Ensure we haven't been run before.
493  DCHECK(browse_filename_.empty());
494  DCHECK(download_filename_.empty());
495  DCHECK(csd_whitelist_filename_.empty());
496  DCHECK(download_whitelist_filename_.empty());
497  DCHECK(extension_blacklist_filename_.empty());
498  DCHECK(side_effect_free_whitelist_filename_.empty());
499  DCHECK(ip_blacklist_filename_.empty());
500
501  browse_filename_ = BrowseDBFilename(filename_base);
502  browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
503
504  browse_store_->Init(
505      browse_filename_,
506      base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
507                 base::Unretained(this)));
508
509  {
510    // NOTE: There is no need to grab the lock in this function, since
511    // until it returns, there are no pointers to this class on other
512    // threads.  Then again, that means there is no possibility of
513    // contention on the lock...
514    base::AutoLock locked(lookup_lock_);
515    cached_browse_hashes_.clear();
516    LoadPrefixSet();
517  }
518
519  if (download_store_.get()) {
520    download_filename_ = DownloadDBFilename(filename_base);
521    download_store_->Init(
522        download_filename_,
523        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
524                   base::Unretained(this)));
525  }
526
527  if (csd_whitelist_store_.get()) {
528    csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
529    csd_whitelist_store_->Init(
530        csd_whitelist_filename_,
531        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
532                   base::Unretained(this)));
533
534    std::vector<SBAddFullHash> full_hashes;
535    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
536      LoadWhitelist(full_hashes, &csd_whitelist_);
537    } else {
538      WhitelistEverything(&csd_whitelist_);
539    }
540  } else {
541    WhitelistEverything(&csd_whitelist_);  // Just to be safe.
542  }
543
544  if (download_whitelist_store_.get()) {
545    download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
546    download_whitelist_store_->Init(
547        download_whitelist_filename_,
548        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
549                   base::Unretained(this)));
550
551    std::vector<SBAddFullHash> full_hashes;
552    if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
553      LoadWhitelist(full_hashes, &download_whitelist_);
554    } else {
555      WhitelistEverything(&download_whitelist_);
556    }
557  } else {
558    WhitelistEverything(&download_whitelist_);  // Just to be safe.
559  }
560
561  if (extension_blacklist_store_.get()) {
562    extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
563    extension_blacklist_store_->Init(
564        extension_blacklist_filename_,
565        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
566                   base::Unretained(this)));
567  }
568
569  if (side_effect_free_whitelist_store_.get()) {
570    side_effect_free_whitelist_filename_ =
571        SideEffectFreeWhitelistDBFilename(filename_base);
572    side_effect_free_whitelist_prefix_set_filename_ =
573        PrefixSetForFilename(side_effect_free_whitelist_filename_);
574    side_effect_free_whitelist_store_->Init(
575        side_effect_free_whitelist_filename_,
576        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
577                   base::Unretained(this)));
578
579    // If there is no database, the filter cannot be used.
580    base::File::Info db_info;
581    if (base::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
582        && db_info.size != 0) {
583      const base::TimeTicks before = base::TimeTicks::Now();
584      side_effect_free_whitelist_prefix_set_ =
585          safe_browsing::PrefixSet::LoadFile(
586              side_effect_free_whitelist_prefix_set_filename_);
587      UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
588                          base::TimeTicks::Now() - before);
589      if (!side_effect_free_whitelist_prefix_set_.get())
590        RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
591    }
592  } else {
593    // Delete any files of the side-effect free sidelist that may be around
594    // from when it was previously enabled.
595    SafeBrowsingStoreFile::DeleteStore(
596        SideEffectFreeWhitelistDBFilename(filename_base));
597  }
598
599  if (ip_blacklist_store_.get()) {
600    ip_blacklist_filename_ = IpBlacklistDBFilename(filename_base);
601    ip_blacklist_store_->Init(
602        ip_blacklist_filename_,
603        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
604                   base::Unretained(this)));
605
606    std::vector<SBAddFullHash> full_hashes;
607    if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
608      LoadIpBlacklist(full_hashes);
609    } else {
610      LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
611    }
612  }
613}
614
615bool SafeBrowsingDatabaseNew::ResetDatabase() {
616  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
617
618  // Delete files on disk.
619  // TODO(shess): Hard to see where one might want to delete without a
620  // reset.  Perhaps inline |Delete()|?
621  if (!Delete())
622    return false;
623
624  // Reset objects in memory.
625  {
626    base::AutoLock locked(lookup_lock_);
627    cached_browse_hashes_.clear();
628    prefix_miss_cache_.clear();
629    browse_prefix_set_.reset();
630    side_effect_free_whitelist_prefix_set_.reset();
631    ip_blacklist_.clear();
632  }
633  // Wants to acquire the lock itself.
634  WhitelistEverything(&csd_whitelist_);
635  WhitelistEverything(&download_whitelist_);
636  return true;
637}
638
639bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
640    const GURL& url,
641    std::vector<SBPrefix>* prefix_hits,
642    std::vector<SBFullHashResult>* cache_hits) {
643  // Clear the results first.
644  prefix_hits->clear();
645  cache_hits->clear();
646
647  std::vector<SBFullHash> full_hashes;
648  BrowseFullHashesToCheck(url, false, &full_hashes);
649  if (full_hashes.empty())
650    return false;
651
652  // This function is called on the I/O thread, prevent changes to
653  // filter and caches.
654  base::AutoLock locked(lookup_lock_);
655
656  // |browse_prefix_set_| is empty until it is either read from disk, or the
657  // first update populates it.  Bail out without a hit if not yet
658  // available.
659  if (!browse_prefix_set_.get())
660    return false;
661
662  size_t miss_count = 0;
663  for (size_t i = 0; i < full_hashes.size(); ++i) {
664    if (browse_prefix_set_->Exists(full_hashes[i])) {
665      const SBPrefix prefix = full_hashes[i].prefix;
666      prefix_hits->push_back(prefix);
667      if (prefix_miss_cache_.count(prefix) > 0)
668        ++miss_count;
669    }
670  }
671
672  // If all the prefixes are cached as 'misses', don't issue a GetHash.
673  if (miss_count == prefix_hits->size())
674    return false;
675
676  // Find matching cached gethash responses.
677  std::sort(prefix_hits->begin(), prefix_hits->end());
678  GetCachedFullHashesForBrowse(*prefix_hits, cached_browse_hashes_, cache_hits);
679
680  return true;
681}
682
683bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
684    const std::vector<GURL>& urls,
685    std::vector<SBPrefix>* prefix_hits) {
686  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
687
688  // Ignore this check when download checking is not enabled.
689  if (!download_store_.get())
690    return false;
691
692  std::vector<SBPrefix> prefixes;
693  GetDownloadUrlPrefixes(urls, &prefixes);
694  return MatchAddPrefixes(download_store_.get(),
695                          safe_browsing_util::BINURL % 2,
696                          prefixes,
697                          prefix_hits);
698}
699
700bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
701  // This method is theoretically thread-safe but we expect all calls to
702  // originate from the IO thread.
703  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
704  std::vector<SBFullHash> full_hashes;
705  BrowseFullHashesToCheck(url, true, &full_hashes);
706  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
707}
708
709bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
710  std::vector<SBFullHash> full_hashes;
711  BrowseFullHashesToCheck(url, true, &full_hashes);
712  return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
713}
714
715bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
716    const std::vector<SBPrefix>& prefixes,
717    std::vector<SBPrefix>* prefix_hits) {
718  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
719  if (!extension_blacklist_store_)
720    return false;
721
722  return MatchAddPrefixes(extension_blacklist_store_.get(),
723                          safe_browsing_util::EXTENSIONBLACKLIST % 2,
724                          prefixes,
725                          prefix_hits);
726}
727
728bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
729    const GURL& url) {
730  std::string host;
731  std::string path;
732  std::string query;
733  safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
734  std::string url_to_check = host + path;
735  if (!query.empty())
736    url_to_check +=  "?" + query;
737  SBFullHash full_hash = SBFullHashForString(url_to_check);
738
739  // This function can be called on any thread, so lock against any changes
740  base::AutoLock locked(lookup_lock_);
741
742  // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
743  // from disk, or the first update populates it.  Bail out without a hit if
744  // not yet available.
745  if (!side_effect_free_whitelist_prefix_set_.get())
746    return false;
747
748  return side_effect_free_whitelist_prefix_set_->Exists(full_hash);
749}
750
751bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
752  net::IPAddressNumber ip_number;
753  if (!net::ParseIPLiteralToNumber(ip_address, &ip_number))
754    return false;
755  if (ip_number.size() == net::kIPv4AddressSize)
756    ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
757  if (ip_number.size() != net::kIPv6AddressSize)
758    return false;  // better safe than sorry.
759
760  // This function can be called from any thread.
761  base::AutoLock locked(lookup_lock_);
762  for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
763       it != ip_blacklist_.end();
764       ++it) {
765    const std::string& mask = it->first;
766    DCHECK_EQ(mask.size(), ip_number.size());
767    std::string subnet(net::kIPv6AddressSize, '\0');
768    for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
769      subnet[i] = ip_number[i] & mask[i];
770    }
771    const std::string hash = base::SHA1HashString(subnet);
772    DVLOG(2) << "Lookup Malware IP: "
773             << " ip:" << ip_address
774             << " mask:" << base::HexEncode(mask.data(), mask.size())
775             << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
776             << " hash:" << base::HexEncode(hash.data(), hash.size());
777    if (it->second.count(hash) > 0) {
778      return true;
779    }
780  }
781  return false;
782}
783
784bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
785    const std::string& str) {
786  std::vector<SBFullHash> hashes;
787  hashes.push_back(SBFullHashForString(str));
788  return ContainsWhitelistedHashes(download_whitelist_, hashes);
789}
790
791bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
792    const SBWhitelist& whitelist,
793    const std::vector<SBFullHash>& hashes) {
794  base::AutoLock l(lookup_lock_);
795  if (whitelist.second)
796    return true;
797  for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
798       it != hashes.end(); ++it) {
799    if (std::binary_search(whitelist.first.begin(), whitelist.first.end(),
800                           *it, SBFullHashLess)) {
801      return true;
802    }
803  }
804  return false;
805}
806
807// Helper to insert add-chunk entries.
808void SafeBrowsingDatabaseNew::InsertAddChunk(
809    SafeBrowsingStore* store,
810    const safe_browsing_util::ListType list_id,
811    const SBChunkData& chunk_data) {
812  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
813  DCHECK(store);
814
815  // The server can give us a chunk that we already have because
816  // it's part of a range.  Don't add it again.
817  const int chunk_id = chunk_data.ChunkNumber();
818  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
819  if (store->CheckAddChunk(encoded_chunk_id))
820    return;
821
822  store->SetAddChunk(encoded_chunk_id);
823  if (chunk_data.IsPrefix()) {
824    const size_t c = chunk_data.PrefixCount();
825    for (size_t i = 0; i < c; ++i) {
826      STATS_COUNTER("SB.PrefixAdd", 1);
827      store->WriteAddPrefix(encoded_chunk_id, chunk_data.PrefixAt(i));
828    }
829  } else {
830    const size_t c = chunk_data.FullHashCount();
831    for (size_t i = 0; i < c; ++i) {
832      STATS_COUNTER("SB.PrefixAddFull", 1);
833      store->WriteAddHash(encoded_chunk_id, chunk_data.FullHashAt(i));
834    }
835  }
836}
837
838// Helper to insert sub-chunk entries.
839void SafeBrowsingDatabaseNew::InsertSubChunk(
840    SafeBrowsingStore* store,
841    const safe_browsing_util::ListType list_id,
842    const SBChunkData& chunk_data) {
843  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
844  DCHECK(store);
845
846  // The server can give us a chunk that we already have because
847  // it's part of a range.  Don't add it again.
848  const int chunk_id = chunk_data.ChunkNumber();
849  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
850  if (store->CheckSubChunk(encoded_chunk_id))
851    return;
852
853  store->SetSubChunk(encoded_chunk_id);
854  if (chunk_data.IsPrefix()) {
855    const size_t c = chunk_data.PrefixCount();
856    for (size_t i = 0; i < c; ++i) {
857      STATS_COUNTER("SB.PrefixSub", 1);
858      const int add_chunk_id = chunk_data.AddChunkNumberAt(i);
859      const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id);
860      store->WriteSubPrefix(encoded_chunk_id, encoded_add_chunk_id,
861                            chunk_data.PrefixAt(i));
862    }
863  } else {
864    const size_t c = chunk_data.FullHashCount();
865    for (size_t i = 0; i < c; ++i) {
866      STATS_COUNTER("SB.PrefixSubFull", 1);
867      const int add_chunk_id = chunk_data.AddChunkNumberAt(i);
868      const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id);
869      store->WriteSubHash(encoded_chunk_id, encoded_add_chunk_id,
870                          chunk_data.FullHashAt(i));
871    }
872  }
873}
874
875void SafeBrowsingDatabaseNew::InsertChunks(
876    const std::string& list_name,
877    const std::vector<SBChunkData*>& chunks) {
878  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
879
880  if (corruption_detected_ || chunks.empty())
881    return;
882
883  const base::TimeTicks before = base::TimeTicks::Now();
884
885  // TODO(shess): The caller should just pass list_id.
886  const safe_browsing_util::ListType list_id =
887      safe_browsing_util::GetListId(list_name);
888
889  SafeBrowsingStore* store = GetStore(list_id);
890  if (!store) return;
891
892  change_detected_ = true;
893
894  // TODO(shess): I believe that the list is always add or sub.  Can this use
895  // that productively?
896  store->BeginChunk();
897  for (size_t i = 0; i < chunks.size(); ++i) {
898    if (chunks[i]->IsAdd()) {
899      InsertAddChunk(store, list_id, *chunks[i]);
900    } else if (chunks[i]->IsSub()) {
901      InsertSubChunk(store, list_id, *chunks[i]);
902    } else {
903      NOTREACHED();
904    }
905  }
906  store->FinishChunk();
907
908  UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
909}
910
911void SafeBrowsingDatabaseNew::DeleteChunks(
912    const std::vector<SBChunkDelete>& chunk_deletes) {
913  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
914
915  if (corruption_detected_ || chunk_deletes.empty())
916    return;
917
918  const std::string& list_name = chunk_deletes.front().list_name;
919  const safe_browsing_util::ListType list_id =
920      safe_browsing_util::GetListId(list_name);
921
922  SafeBrowsingStore* store = GetStore(list_id);
923  if (!store) return;
924
925  change_detected_ = true;
926
927  for (size_t i = 0; i < chunk_deletes.size(); ++i) {
928    std::vector<int> chunk_numbers;
929    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
930    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
931      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
932      if (chunk_deletes[i].is_sub_del)
933        store->DeleteSubChunk(encoded_chunk_id);
934      else
935        store->DeleteAddChunk(encoded_chunk_id);
936    }
937  }
938}
939
940void SafeBrowsingDatabaseNew::CacheHashResults(
941    const std::vector<SBPrefix>& prefixes,
942    const std::vector<SBFullHashResult>& full_hits,
943    const base::TimeDelta& cache_lifetime) {
944  const base::Time expire_after = base::Time::Now() + cache_lifetime;
945
946  // This is called on the I/O thread, lock against updates.
947  base::AutoLock locked(lookup_lock_);
948
949  if (full_hits.empty()) {
950    prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
951    return;
952  }
953
954  const size_t orig_size = cached_browse_hashes_.size();
955  for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
956       iter != full_hits.end(); ++iter) {
957    if (iter->list_id == safe_browsing_util::MALWARE ||
958        iter->list_id == safe_browsing_util::PHISH) {
959      SBFullHashCached cached_hash;
960      cached_hash.hash = iter->hash;
961      cached_hash.list_id = iter->list_id;
962      cached_hash.expire_after = expire_after;
963      cached_browse_hashes_.push_back(cached_hash);
964    }
965  }
966
967  // Sort new entries then merge with the previously-sorted entries.
968  std::vector<SBFullHashCached>::iterator
969      orig_end = cached_browse_hashes_.begin() + orig_size;
970  std::sort(orig_end, cached_browse_hashes_.end(), SBFullHashCachedPrefixLess);
971  std::inplace_merge(cached_browse_hashes_.begin(),
972                     orig_end, cached_browse_hashes_.end(),
973                     SBFullHashCachedPrefixLess);
974}
975
976bool SafeBrowsingDatabaseNew::UpdateStarted(
977    std::vector<SBListChunkRanges>* lists) {
978  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
979  DCHECK(lists);
980
981  // If |BeginUpdate()| fails, reset the database.
982  if (!browse_store_->BeginUpdate()) {
983    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
984    HandleCorruptDatabase();
985    return false;
986  }
987
988  if (download_store_.get() && !download_store_->BeginUpdate()) {
989    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
990    HandleCorruptDatabase();
991    return false;
992  }
993
994  if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
995    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
996    HandleCorruptDatabase();
997    return false;
998  }
999
1000  if (download_whitelist_store_.get() &&
1001      !download_whitelist_store_->BeginUpdate()) {
1002    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1003    HandleCorruptDatabase();
1004    return false;
1005  }
1006
1007  if (extension_blacklist_store_ &&
1008      !extension_blacklist_store_->BeginUpdate()) {
1009    RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1010    HandleCorruptDatabase();
1011    return false;
1012  }
1013
1014  if (side_effect_free_whitelist_store_ &&
1015      !side_effect_free_whitelist_store_->BeginUpdate()) {
1016    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1017    HandleCorruptDatabase();
1018    return false;
1019  }
1020
1021  if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1022    RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1023    HandleCorruptDatabase();
1024    return false;
1025  }
1026
1027  UpdateChunkRangesForLists(browse_store_.get(),
1028                            safe_browsing_util::kMalwareList,
1029                            safe_browsing_util::kPhishingList,
1030                            lists);
1031
1032  // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1033  // deprecated.  Code to delete the list from the store shows ~15k hits/day as
1034  // of Feb 2014, so it has been removed.  Everything _should_ be resilient to
1035  // extra data of that sort.
1036  UpdateChunkRangesForList(download_store_.get(),
1037                           safe_browsing_util::kBinUrlList, lists);
1038
1039  UpdateChunkRangesForList(csd_whitelist_store_.get(),
1040                           safe_browsing_util::kCsdWhiteList, lists);
1041
1042  UpdateChunkRangesForList(download_whitelist_store_.get(),
1043                           safe_browsing_util::kDownloadWhiteList, lists);
1044
1045  UpdateChunkRangesForList(extension_blacklist_store_.get(),
1046                           safe_browsing_util::kExtensionBlacklist, lists);
1047
1048  UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(),
1049                           safe_browsing_util::kSideEffectFreeWhitelist, lists);
1050
1051  UpdateChunkRangesForList(ip_blacklist_store_.get(),
1052                           safe_browsing_util::kIPBlacklist, lists);
1053
1054  corruption_detected_ = false;
1055  change_detected_ = false;
1056  return true;
1057}
1058
1059void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1060  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1061
1062  // The update may have failed due to corrupt storage (for instance,
1063  // an excessive number of invalid add_chunks and sub_chunks).
1064  // Double-check that the databases are valid.
1065  // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1066  // sections would allow throwing a corruption error in
1067  // UpdateStarted().
1068  if (!update_succeeded) {
1069    if (!browse_store_->CheckValidity())
1070      DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1071
1072    if (download_store_.get() && !download_store_->CheckValidity())
1073      DLOG(ERROR) << "Safe-browsing download database corrupt.";
1074
1075    if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1076      DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1077
1078    if (download_whitelist_store_.get() &&
1079        !download_whitelist_store_->CheckValidity()) {
1080      DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1081    }
1082
1083    if (extension_blacklist_store_ &&
1084        !extension_blacklist_store_->CheckValidity()) {
1085      DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1086    }
1087
1088    if (side_effect_free_whitelist_store_ &&
1089        !side_effect_free_whitelist_store_->CheckValidity()) {
1090      DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1091                  << "corrupt.";
1092    }
1093
1094    if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1095      DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1096    }
1097  }
1098
1099  if (corruption_detected_)
1100    return;
1101
1102  // Unroll the transaction if there was a protocol error or if the
1103  // transaction was empty.  This will leave the prefix set, the
1104  // pending hashes, and the prefix miss cache in place.
1105  if (!update_succeeded || !change_detected_) {
1106    // Track empty updates to answer questions at http://crbug.com/72216 .
1107    if (update_succeeded && !change_detected_)
1108      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1109    browse_store_->CancelUpdate();
1110    if (download_store_.get())
1111      download_store_->CancelUpdate();
1112    if (csd_whitelist_store_.get())
1113      csd_whitelist_store_->CancelUpdate();
1114    if (download_whitelist_store_.get())
1115      download_whitelist_store_->CancelUpdate();
1116    if (extension_blacklist_store_)
1117      extension_blacklist_store_->CancelUpdate();
1118    if (side_effect_free_whitelist_store_)
1119      side_effect_free_whitelist_store_->CancelUpdate();
1120    if (ip_blacklist_store_)
1121      ip_blacklist_store_->CancelUpdate();
1122    return;
1123  }
1124
1125  if (download_store_) {
1126    int64 size_bytes = UpdateHashPrefixStore(
1127        download_filename_,
1128        download_store_.get(),
1129        FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1130    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1131                         static_cast<int>(size_bytes / 1024));
1132  }
1133
1134  UpdateBrowseStore();
1135  UpdateWhitelistStore(csd_whitelist_filename_,
1136                       csd_whitelist_store_.get(),
1137                       &csd_whitelist_);
1138  UpdateWhitelistStore(download_whitelist_filename_,
1139                       download_whitelist_store_.get(),
1140                       &download_whitelist_);
1141
1142  if (extension_blacklist_store_) {
1143    int64 size_bytes = UpdateHashPrefixStore(
1144        extension_blacklist_filename_,
1145        extension_blacklist_store_.get(),
1146        FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1147    UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1148                         static_cast<int>(size_bytes / 1024));
1149  }
1150
1151  if (side_effect_free_whitelist_store_)
1152    UpdateSideEffectFreeWhitelistStore();
1153
1154  if (ip_blacklist_store_)
1155    UpdateIpBlacklistStore();
1156}
1157
1158void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1159    const base::FilePath& store_filename,
1160    SafeBrowsingStore* store,
1161    SBWhitelist* whitelist) {
1162  if (!store)
1163    return;
1164
1165  // Note: |builder| will not be empty.  The current data store implementation
1166  // stores all full-length hashes as both full and prefix hashes.
1167  safe_browsing::PrefixSetBuilder builder;
1168  std::vector<SBAddFullHash> full_hashes;
1169  if (!store->FinishUpdate(&builder, &full_hashes)) {
1170    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1171    WhitelistEverything(whitelist);
1172    return;
1173  }
1174
1175#if defined(OS_MACOSX)
1176  base::mac::SetFileBackupExclusion(store_filename);
1177#endif
1178
1179  LoadWhitelist(full_hashes, whitelist);
1180}
1181
1182int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1183    const base::FilePath& store_filename,
1184    SafeBrowsingStore* store,
1185    FailureType failure_type) {
1186  // These results are not used after this call. Simply ignore the
1187  // returned value after FinishUpdate(...).
1188  safe_browsing::PrefixSetBuilder builder;
1189  std::vector<SBAddFullHash> add_full_hashes_result;
1190
1191  if (!store->FinishUpdate(&builder, &add_full_hashes_result))
1192    RecordFailure(failure_type);
1193
1194#if defined(OS_MACOSX)
1195  base::mac::SetFileBackupExclusion(store_filename);
1196#endif
1197
1198  return GetFileSizeOrZero(store_filename);
1199}
1200
1201void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1202  // Measure the amount of IO during the filter build.
1203  base::IoCounters io_before, io_after;
1204  base::ProcessHandle handle = base::Process::Current().handle();
1205  scoped_ptr<base::ProcessMetrics> metric(
1206#if !defined(OS_MACOSX)
1207      base::ProcessMetrics::CreateProcessMetrics(handle)
1208#else
1209      // Getting stats only for the current process is enough, so NULL is fine.
1210      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1211#endif
1212  );
1213
1214  // IoCounters are currently not supported on Mac, and may not be
1215  // available for Linux, so we check the result and only show IO
1216  // stats if they are available.
1217  const bool got_counters = metric->GetIOCounters(&io_before);
1218
1219  const base::TimeTicks before = base::TimeTicks::Now();
1220
1221  // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the
1222  // fly?  Other clients use the SBAddFullHash vector, but AFAICT they only use
1223  // the SBFullHash portion.  It would need an accessor on PrefixSet.
1224  safe_browsing::PrefixSetBuilder builder;
1225  std::vector<SBAddFullHash> add_full_hashes;
1226  if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
1227    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1228    return;
1229  }
1230
1231  std::vector<SBFullHash> full_hash_results;
1232  for (size_t i = 0; i < add_full_hashes.size(); ++i) {
1233    full_hash_results.push_back(add_full_hashes[i].full_hash);
1234  }
1235
1236  scoped_ptr<safe_browsing::PrefixSet>
1237      prefix_set(builder.GetPrefixSet(full_hash_results));
1238
1239  // Swap in the newly built filter and cache.
1240  {
1241    base::AutoLock locked(lookup_lock_);
1242
1243    // TODO(shess): If |CacheHashResults()| is posted between the
1244    // earlier lock and this clear, those pending hashes will be lost.
1245    // It could be fixed by only removing hashes which were collected
1246    // at the earlier point.  I believe that is fail-safe as-is (the
1247    // hash will be fetched again).
1248    cached_browse_hashes_.clear();
1249    prefix_miss_cache_.clear();
1250    browse_prefix_set_.swap(prefix_set);
1251  }
1252
1253  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1254
1255  // Persist the prefix set to disk.  Since only this thread changes
1256  // |browse_prefix_set_|, there is no need to lock.
1257  WritePrefixSet();
1258
1259  // Gather statistics.
1260  if (got_counters && metric->GetIOCounters(&io_after)) {
1261    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1262                         static_cast<int>(io_after.ReadTransferCount -
1263                                          io_before.ReadTransferCount) / 1024);
1264    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1265                         static_cast<int>(io_after.WriteTransferCount -
1266                                          io_before.WriteTransferCount) / 1024);
1267    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1268                         static_cast<int>(io_after.ReadOperationCount -
1269                                          io_before.ReadOperationCount));
1270    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1271                         static_cast<int>(io_after.WriteOperationCount -
1272                                          io_before.WriteOperationCount));
1273  }
1274
1275  int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
1276  UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1277                       static_cast<int>(file_size / 1024));
1278  file_size = GetFileSizeOrZero(browse_filename_);
1279  UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1280                       static_cast<int>(file_size / 1024));
1281
1282#if defined(OS_MACOSX)
1283  base::mac::SetFileBackupExclusion(browse_filename_);
1284#endif
1285}
1286
1287void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1288  safe_browsing::PrefixSetBuilder builder;
1289  std::vector<SBAddFullHash> add_full_hashes_result;
1290
1291  if (!side_effect_free_whitelist_store_->FinishUpdate(
1292          &builder, &add_full_hashes_result)) {
1293    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1294    return;
1295  }
1296  scoped_ptr<safe_browsing::PrefixSet>
1297      prefix_set(builder.GetPrefixSetNoHashes());
1298
1299  // Swap in the newly built prefix set.
1300  {
1301    base::AutoLock locked(lookup_lock_);
1302    side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1303  }
1304
1305  const base::TimeTicks before = base::TimeTicks::Now();
1306  const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1307      side_effect_free_whitelist_prefix_set_filename_);
1308  UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1309                      base::TimeTicks::Now() - before);
1310
1311  if (!write_ok)
1312    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1313
1314  // Gather statistics.
1315  int64 file_size = GetFileSizeOrZero(
1316      side_effect_free_whitelist_prefix_set_filename_);
1317  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1318                       static_cast<int>(file_size / 1024));
1319  file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
1320  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1321                       static_cast<int>(file_size / 1024));
1322
1323#if defined(OS_MACOSX)
1324  base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1325  base::mac::SetFileBackupExclusion(
1326      side_effect_free_whitelist_prefix_set_filename_);
1327#endif
1328}
1329
1330void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1331  // Note: prefixes will not be empty.  The current data store implementation
1332  // stores all full-length hashes as both full and prefix hashes.
1333  safe_browsing::PrefixSetBuilder builder;
1334  std::vector<SBAddFullHash> full_hashes;
1335  if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1336    RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1337    LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
1338    return;
1339  }
1340
1341#if defined(OS_MACOSX)
1342  base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1343#endif
1344
1345  LoadIpBlacklist(full_hashes);
1346}
1347
1348void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1349  // Reset the database after the current task has unwound (but only
1350  // reset once within the scope of a given task).
1351  if (!reset_factory_.HasWeakPtrs()) {
1352    RecordFailure(FAILURE_DATABASE_CORRUPT);
1353    base::MessageLoop::current()->PostTask(FROM_HERE,
1354        base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1355                   reset_factory_.GetWeakPtr()));
1356  }
1357}
1358
1359void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1360  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1361  corruption_detected_ = true;  // Stop updating the database.
1362  ResetDatabase();
1363
1364  // NOTE(shess): ResetDatabase() should remove the corruption, so this should
1365  // only happen once.  If you are here because you are hitting this after a
1366  // restart, then I would be very interested in working with you to figure out
1367  // what is happening, since it may affect real users.
1368  DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1369}
1370
1371// TODO(shess): I'm not clear why this code doesn't have any
1372// real error-handling.
1373void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1374  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1375  DCHECK(!browse_prefix_set_filename_.empty());
1376
1377  // If there is no database, the filter cannot be used.
1378  base::File::Info db_info;
1379  if (!base::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1380    return;
1381
1382  // Cleanup any stale bloom filter (no longer used).
1383  // TODO(shess): Track failure to delete?
1384  base::FilePath bloom_filter_filename =
1385      BloomFilterForFilename(browse_filename_);
1386  base::DeleteFile(bloom_filter_filename, false);
1387
1388  const base::TimeTicks before = base::TimeTicks::Now();
1389  browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile(
1390      browse_prefix_set_filename_);
1391  UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1392
1393  if (!browse_prefix_set_.get())
1394    RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1395}
1396
1397bool SafeBrowsingDatabaseNew::Delete() {
1398  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1399
1400  const bool r1 = browse_store_->Delete();
1401  if (!r1)
1402    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1403
1404  const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1405  if (!r2)
1406    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1407
1408  const bool r3 = csd_whitelist_store_.get() ?
1409      csd_whitelist_store_->Delete() : true;
1410  if (!r3)
1411    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1412
1413  const bool r4 = download_whitelist_store_.get() ?
1414      download_whitelist_store_->Delete() : true;
1415  if (!r4)
1416    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1417
1418  base::FilePath bloom_filter_filename =
1419      BloomFilterForFilename(browse_filename_);
1420  const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1421  if (!r5)
1422    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1423
1424  const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
1425  if (!r6)
1426    RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1427
1428  const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
1429  if (!r7)
1430    RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1431
1432  const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
1433                                    false);
1434  if (!r8)
1435    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1436
1437  const bool r9 = base::DeleteFile(
1438      side_effect_free_whitelist_prefix_set_filename_,
1439      false);
1440  if (!r9)
1441    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1442
1443  const bool r10 = base::DeleteFile(ip_blacklist_filename_, false);
1444  if (!r10)
1445    RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1446
1447  return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
1448}
1449
1450void SafeBrowsingDatabaseNew::WritePrefixSet() {
1451  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1452
1453  if (!browse_prefix_set_.get())
1454    return;
1455
1456  const base::TimeTicks before = base::TimeTicks::Now();
1457  const bool write_ok = browse_prefix_set_->WriteFile(
1458      browse_prefix_set_filename_);
1459  UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1460
1461  if (!write_ok)
1462    RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1463
1464#if defined(OS_MACOSX)
1465  base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
1466#endif
1467}
1468
1469void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1470  base::AutoLock locked(lookup_lock_);
1471  whitelist->second = true;
1472  whitelist->first.clear();
1473}
1474
1475void SafeBrowsingDatabaseNew::LoadWhitelist(
1476    const std::vector<SBAddFullHash>& full_hashes,
1477    SBWhitelist* whitelist) {
1478  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1479  if (full_hashes.size() > kMaxWhitelistSize) {
1480    WhitelistEverything(whitelist);
1481    return;
1482  }
1483
1484  std::vector<SBFullHash> new_whitelist;
1485  new_whitelist.reserve(full_hashes.size());
1486  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1487       it != full_hashes.end(); ++it) {
1488    new_whitelist.push_back(it->full_hash);
1489  }
1490  std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess);
1491
1492  SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl);
1493  if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1494                         kill_switch, SBFullHashLess)) {
1495    // The kill switch is whitelisted hence we whitelist all URLs.
1496    WhitelistEverything(whitelist);
1497  } else {
1498    base::AutoLock locked(lookup_lock_);
1499    whitelist->second = false;
1500    whitelist->first.swap(new_whitelist);
1501  }
1502}
1503
1504void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1505    const std::vector<SBAddFullHash>& full_hashes) {
1506  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1507  IPBlacklist new_blacklist;
1508  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1509       it != full_hashes.end();
1510       ++it) {
1511    const char* full_hash = it->full_hash.full_hash;
1512    DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1513    // The format of the IP blacklist is:
1514    // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1515    std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1516    size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1517    if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1518      RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1519      new_blacklist.clear();  // Load empty blacklist.
1520      break;
1521    }
1522
1523    // We precompute the mask for the given subnet size to speed up lookups.
1524    // Basically we need to create a 16B long string which has the highest
1525    // |size| bits sets to one.
1526    std::string mask(net::kIPv6AddressSize, '\0');
1527    mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1528    if ((prefix_size % 8) != 0) {
1529      mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1530    }
1531    DVLOG(2) << "Inserting malicious IP: "
1532             << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1533             << " mask:" << base::HexEncode(mask.data(), mask.size())
1534             << " prefix_size:" << prefix_size
1535             << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1536                                                 hashed_ip_prefix.size());
1537    new_blacklist[mask].insert(hashed_ip_prefix);
1538  }
1539
1540  base::AutoLock locked(lookup_lock_);
1541  ip_blacklist_.swap(new_blacklist);
1542}
1543
1544bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1545  SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1546  std::vector<SBFullHash> full_hashes;
1547  full_hashes.push_back(malware_kill_switch);
1548  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1549}
1550
1551bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() {
1552  return csd_whitelist_.second;
1553}
1554