safe_browsing_database.cc revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/safe_browsing_database.h"
6
7#include <algorithm>
8#include <iterator>
9
10#include "base/bind.h"
11#include "base/file_util.h"
12#include "base/message_loop/message_loop.h"
13#include "base/metrics/histogram.h"
14#include "base/metrics/stats_counters.h"
15#include "base/process/process.h"
16#include "base/process/process_metrics.h"
17#include "base/sha1.h"
18#include "base/strings/string_number_conversions.h"
19#include "base/strings/stringprintf.h"
20#include "base/time/time.h"
21#include "chrome/browser/safe_browsing/prefix_set.h"
22#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
23#include "content/public/browser/browser_thread.h"
24#include "crypto/sha2.h"
25#include "net/base/net_util.h"
26#include "url/gurl.h"
27
28#if defined(OS_MACOSX)
29#include "base/mac/mac_util.h"
30#endif
31
32using content::BrowserThread;
33
34namespace {
35
36// Filename suffix for the bloom filter.
37const base::FilePath::CharType kBloomFilterFile[] =
38    FILE_PATH_LITERAL(" Filter 2");
39// Filename suffix for the prefix set.
40const base::FilePath::CharType kPrefixSetFile[] =
41    FILE_PATH_LITERAL(" Prefix Set");
42// Filename suffix for download store.
43const base::FilePath::CharType kDownloadDBFile[] =
44    FILE_PATH_LITERAL(" Download");
45// Filename suffix for client-side phishing detection whitelist store.
46const base::FilePath::CharType kCsdWhitelistDBFile[] =
47    FILE_PATH_LITERAL(" Csd Whitelist");
48// Filename suffix for the download whitelist store.
49const base::FilePath::CharType kDownloadWhitelistDBFile[] =
50    FILE_PATH_LITERAL(" Download Whitelist");
51// Filename suffix for the extension blacklist store.
52const base::FilePath::CharType kExtensionBlacklistDBFile[] =
53    FILE_PATH_LITERAL(" Extension Blacklist");
54// Filename suffix for the side-effect free whitelist store.
55const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
56    FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
57// Filename suffix for the csd malware IP blacklist store.
58const base::FilePath::CharType kIPBlacklistDBFile[] =
59    FILE_PATH_LITERAL(" IP Blacklist");
60
61// Filename suffix for browse store.
62// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63// Unfortunately, to change the name implies lots of transition code
64// for little benefit.  If/when file formats change (say to put all
65// the data in one file), that would be a convenient point to rectify
66// this.
67const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
68
69// The maximum staleness for a cached entry.
70const int kMaxStalenessMinutes = 45;
71
72// Maximum number of entries we allow in any of the whitelists.
73// If a whitelist on disk contains more entries then all lookups to
74// the whitelist will be considered a match.
75const size_t kMaxWhitelistSize = 5000;
76
77// If the hash of this exact expression is on a whitelist then all
78// lookups to this whitelist will be considered a match.
79const char kWhitelistKillSwitchUrl[] =
80    "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
81
82// If the hash of this exact expression is on a whitelist then the
83// malware IP blacklisting feature will be disabled in csd.
84// Don't change this!
85const char kMalwareIPKillSwitchUrl[] =
86    "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
87
88const size_t kMaxIpPrefixSize = 128;
89const size_t kMinIpPrefixSize = 1;
90
91// To save space, the incoming |chunk_id| and |list_id| are combined
92// into an |encoded_chunk_id| for storage by shifting the |list_id|
93// into the low-order bits.  These functions decode that information.
94// TODO(lzheng): It was reasonable when database is saved in sqlite, but
95// there should be better ways to save chunk_id and list_id after we use
96// SafeBrowsingStoreFile.
97int GetListIdBit(const int encoded_chunk_id) {
98  return encoded_chunk_id & 1;
99}
100int DecodeChunkId(int encoded_chunk_id) {
101  return encoded_chunk_id >> 1;
102}
103int EncodeChunkId(const int chunk, const int list_id) {
104  DCHECK_NE(list_id, safe_browsing_util::INVALID);
105  return chunk << 1 | list_id % 2;
106}
107
108// Generate the set of full hashes to check for |url|.  If
109// |include_whitelist_hashes| is true we will generate additional path-prefixes
110// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
111// whitelist it should also match /foo/bar which is not the case for all the
112// other lists.  We'll also always add a pattern for the empty path.
113// TODO(shess): This function is almost the same as
114// |CompareFullHashes()| in safe_browsing_util.cc, except that code
115// does an early exit on match.  Since match should be the infrequent
116// case (phishing or malware found), consider combining this function
117// with that one.
118void BrowseFullHashesToCheck(const GURL& url,
119                             bool include_whitelist_hashes,
120                             std::vector<SBFullHash>* full_hashes) {
121  std::vector<std::string> hosts;
122  if (url.HostIsIPAddress()) {
123    hosts.push_back(url.host());
124  } else {
125    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
126  }
127
128  std::vector<std::string> paths;
129  safe_browsing_util::GeneratePathsToCheck(url, &paths);
130
131  for (size_t i = 0; i < hosts.size(); ++i) {
132    for (size_t j = 0; j < paths.size(); ++j) {
133      const std::string& path = paths[j];
134      full_hashes->push_back(SBFullHashForString(hosts[i] + path));
135
136      // We may have /foo as path-prefix in the whitelist which should
137      // also match with /foo/bar and /foo?bar.  Hence, for every path
138      // that ends in '/' we also add the path without the slash.
139      if (include_whitelist_hashes &&
140          path.size() > 1 &&
141          path[path.size() - 1] == '/') {
142        full_hashes->push_back(
143            SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1)));
144      }
145    }
146  }
147}
148
149// Get the prefixes matching the download |urls|.
150void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
151                            std::vector<SBPrefix>* prefixes) {
152  std::vector<SBFullHash> full_hashes;
153  for (size_t i = 0; i < urls.size(); ++i)
154    BrowseFullHashesToCheck(urls[i], false, &full_hashes);
155
156  for (size_t i = 0; i < full_hashes.size(); ++i)
157    prefixes->push_back(full_hashes[i].prefix);
158}
159
160// Helper function to compare addprefixes in |store| with |prefixes|.
161// The |list_bit| indicates which list (url or hash) to compare.
162//
163// Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
164// the actual matching prefixes.
165bool MatchAddPrefixes(SafeBrowsingStore* store,
166                      int list_bit,
167                      const std::vector<SBPrefix>& prefixes,
168                      std::vector<SBPrefix>* prefix_hits) {
169  prefix_hits->clear();
170  bool found_match = false;
171
172  SBAddPrefixes add_prefixes;
173  store->GetAddPrefixes(&add_prefixes);
174  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
175       iter != add_prefixes.end(); ++iter) {
176    for (size_t j = 0; j < prefixes.size(); ++j) {
177      const SBPrefix& prefix = prefixes[j];
178      if (prefix == iter->prefix &&
179          GetListIdBit(iter->chunk_id) == list_bit) {
180        prefix_hits->push_back(prefix);
181        found_match = true;
182      }
183    }
184  }
185  return found_match;
186}
187
188// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
189// add them to |full_hits| if not expired.  "Not expired" is when
190// either |last_update| was recent enough, or the item has been
191// received recently enough.  Expired items are not deleted because a
192// future update may make them acceptable again.
193//
194// For efficiency reasons the code walks |prefix_hits| and
195// |full_hashes| in parallel, so they must be sorted by prefix.
196void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
197                                  const std::vector<SBAddFullHash>& full_hashes,
198                                  std::vector<SBFullHashResult>* full_hits,
199                                  base::Time last_update) {
200  const base::Time expire_time =
201      base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
202
203  std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
204  std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
205
206  while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
207    if (*piter < hiter->full_hash.prefix) {
208      ++piter;
209    } else if (hiter->full_hash.prefix < *piter) {
210      ++hiter;
211    } else {
212      if (expire_time < last_update ||
213          expire_time.ToTimeT() < hiter->received) {
214        SBFullHashResult result;
215        const int list_bit = GetListIdBit(hiter->chunk_id);
216        DCHECK(list_bit == safe_browsing_util::MALWARE ||
217               list_bit == safe_browsing_util::PHISH);
218        const safe_browsing_util::ListType list_id =
219            static_cast<safe_browsing_util::ListType>(list_bit);
220        if (!safe_browsing_util::GetListName(list_id, &result.list_name))
221          continue;
222        result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
223        result.hash = hiter->full_hash;
224        full_hits->push_back(result);
225      }
226
227      // Only increment |hiter|, |piter| might have multiple hits.
228      ++hiter;
229    }
230  }
231}
232
233// This function generates a chunk range string for |chunks|. It
234// outputs one chunk range string per list and writes it to the
235// |list_ranges| vector.  We expect |list_ranges| to already be of the
236// right size.  E.g., if |chunks| contains chunks with two different
237// list ids then |list_ranges| must contain two elements.
238void GetChunkRanges(const std::vector<int>& chunks,
239                    std::vector<std::string>* list_ranges) {
240  // Since there are 2 possible list ids, there must be exactly two
241  // list ranges.  Even if the chunk data should only contain one
242  // line, this code has to somehow handle corruption.
243  DCHECK_EQ(2U, list_ranges->size());
244
245  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
246  for (std::vector<int>::const_iterator iter = chunks.begin();
247       iter != chunks.end(); ++iter) {
248    int mod_list_id = GetListIdBit(*iter);
249    DCHECK_GE(mod_list_id, 0);
250    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
251    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
252  }
253  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
254    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
255  }
256}
257
258// Helper function to create chunk range lists for Browse related
259// lists.
260void UpdateChunkRanges(SafeBrowsingStore* store,
261                       const std::vector<std::string>& listnames,
262                       std::vector<SBListChunkRanges>* lists) {
263  if (!store)
264    return;
265
266  DCHECK_GT(listnames.size(), 0U);
267  DCHECK_LE(listnames.size(), 2U);
268  std::vector<int> add_chunks;
269  std::vector<int> sub_chunks;
270  store->GetAddChunks(&add_chunks);
271  store->GetSubChunks(&sub_chunks);
272
273  // Always decode 2 ranges, even if only the first one is expected.
274  // The loop below will only load as many into |lists| as |listnames|
275  // indicates.
276  std::vector<std::string> adds(2);
277  std::vector<std::string> subs(2);
278  GetChunkRanges(add_chunks, &adds);
279  GetChunkRanges(sub_chunks, &subs);
280
281  for (size_t i = 0; i < listnames.size(); ++i) {
282    const std::string& listname = listnames[i];
283    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
284              static_cast<int>(i % 2));
285    DCHECK_NE(safe_browsing_util::GetListId(listname),
286              safe_browsing_util::INVALID);
287    lists->push_back(SBListChunkRanges(listname));
288    lists->back().adds.swap(adds[i]);
289    lists->back().subs.swap(subs[i]);
290  }
291}
292
293void UpdateChunkRangesForLists(SafeBrowsingStore* store,
294                               const std::string& listname0,
295                               const std::string& listname1,
296                               std::vector<SBListChunkRanges>* lists) {
297  std::vector<std::string> listnames;
298  listnames.push_back(listname0);
299  listnames.push_back(listname1);
300  UpdateChunkRanges(store, listnames, lists);
301}
302
303void UpdateChunkRangesForList(SafeBrowsingStore* store,
304                              const std::string& listname,
305                              std::vector<SBListChunkRanges>* lists) {
306  UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
307}
308
309// Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
310// safe_browsing_store.h orders on both chunk-id and prefix.
311bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
312  return a.full_hash.prefix < b.full_hash.prefix;
313}
314
315// This code always checks for non-zero file size.  This helper makes
316// that less verbose.
317int64 GetFileSizeOrZero(const base::FilePath& file_path) {
318  int64 size_64;
319  if (!base::GetFileSize(file_path, &size_64))
320    return 0;
321  return size_64;
322}
323
324// Used to order whitelist storage in memory.
325bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
326  return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
327}
328
329}  // namespace
330
331// The default SafeBrowsingDatabaseFactory.
332class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
333 public:
334  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
335      bool enable_download_protection,
336      bool enable_client_side_whitelist,
337      bool enable_download_whitelist,
338      bool enable_extension_blacklist,
339      bool enable_side_effect_free_whitelist,
340      bool enable_ip_blacklist) OVERRIDE {
341    return new SafeBrowsingDatabaseNew(
342        new SafeBrowsingStoreFile,
343        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
344        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
345        enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
346        enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
347        enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
348        enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
349  }
350
351  SafeBrowsingDatabaseFactoryImpl() { }
352
353 private:
354  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
355};
356
357// static
358SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
359
360// Factory method, non-thread safe. Caller has to make sure this s called
361// on SafeBrowsing Thread.
362// TODO(shess): There's no need for a factory any longer.  Convert
363// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
364// callers just construct things directly.
365SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
366    bool enable_download_protection,
367    bool enable_client_side_whitelist,
368    bool enable_download_whitelist,
369    bool enable_extension_blacklist,
370    bool enable_side_effect_free_whitelist,
371    bool enable_ip_blacklist) {
372  if (!factory_)
373    factory_ = new SafeBrowsingDatabaseFactoryImpl();
374  return factory_->CreateSafeBrowsingDatabase(
375      enable_download_protection,
376      enable_client_side_whitelist,
377      enable_download_whitelist,
378      enable_extension_blacklist,
379      enable_side_effect_free_whitelist,
380      enable_ip_blacklist);
381}
382
383SafeBrowsingDatabase::~SafeBrowsingDatabase() {
384}
385
386// static
387base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
388    const base::FilePath& db_base_filename) {
389  return base::FilePath(db_base_filename.value() + kBrowseDBFile);
390}
391
392// static
393base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
394    const base::FilePath& db_base_filename) {
395  return base::FilePath(db_base_filename.value() + kDownloadDBFile);
396}
397
398// static
399base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
400    const base::FilePath& db_filename) {
401  return base::FilePath(db_filename.value() + kBloomFilterFile);
402}
403
404// static
405base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
406    const base::FilePath& db_filename) {
407  return base::FilePath(db_filename.value() + kPrefixSetFile);
408}
409
410// static
411base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
412    const base::FilePath& db_filename) {
413  return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
414}
415
416// static
417base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
418    const base::FilePath& db_filename) {
419  return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
420}
421
422// static
423base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
424    const base::FilePath& db_filename) {
425  return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
426}
427
428// static
429base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
430    const base::FilePath& db_filename) {
431  return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
432}
433
434// static
435base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
436    const base::FilePath& db_filename) {
437  return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
438}
439
440SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
441  if (list_id == safe_browsing_util::PHISH ||
442      list_id == safe_browsing_util::MALWARE) {
443    return browse_store_.get();
444  } else if (list_id == safe_browsing_util::BINURL) {
445    return download_store_.get();
446  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
447    return csd_whitelist_store_.get();
448  } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
449    return download_whitelist_store_.get();
450  } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
451    return extension_blacklist_store_.get();
452  } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
453    return side_effect_free_whitelist_store_.get();
454  } else if (list_id == safe_browsing_util::IPBLACKLIST) {
455    return ip_blacklist_store_.get();
456  }
457  return NULL;
458}
459
460// static
461void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
462  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
463                            FAILURE_DATABASE_MAX);
464}
465
466SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
467    : creation_loop_(base::MessageLoop::current()),
468      browse_store_(new SafeBrowsingStoreFile),
469      reset_factory_(this),
470      corruption_detected_(false),
471      change_detected_(false) {
472  DCHECK(browse_store_.get());
473  DCHECK(!download_store_.get());
474  DCHECK(!csd_whitelist_store_.get());
475  DCHECK(!download_whitelist_store_.get());
476  DCHECK(!extension_blacklist_store_.get());
477  DCHECK(!side_effect_free_whitelist_store_.get());
478  DCHECK(!ip_blacklist_store_.get());
479}
480
481SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
482    SafeBrowsingStore* browse_store,
483    SafeBrowsingStore* download_store,
484    SafeBrowsingStore* csd_whitelist_store,
485    SafeBrowsingStore* download_whitelist_store,
486    SafeBrowsingStore* extension_blacklist_store,
487    SafeBrowsingStore* side_effect_free_whitelist_store,
488    SafeBrowsingStore* ip_blacklist_store)
489    : creation_loop_(base::MessageLoop::current()),
490      browse_store_(browse_store),
491      download_store_(download_store),
492      csd_whitelist_store_(csd_whitelist_store),
493      download_whitelist_store_(download_whitelist_store),
494      extension_blacklist_store_(extension_blacklist_store),
495      side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
496      ip_blacklist_store_(ip_blacklist_store),
497      reset_factory_(this),
498      corruption_detected_(false) {
499  DCHECK(browse_store_.get());
500}
501
502SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
503  // The DCHECK is disabled due to crbug.com/338486 .
504  // DCHECK_EQ(creation_loop_, base::MessageLoop::current());
505}
506
507void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
508  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
509  // Ensure we haven't been run before.
510  DCHECK(browse_filename_.empty());
511  DCHECK(download_filename_.empty());
512  DCHECK(csd_whitelist_filename_.empty());
513  DCHECK(download_whitelist_filename_.empty());
514  DCHECK(extension_blacklist_filename_.empty());
515  DCHECK(side_effect_free_whitelist_filename_.empty());
516  DCHECK(ip_blacklist_filename_.empty());
517
518  browse_filename_ = BrowseDBFilename(filename_base);
519  browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
520
521  browse_store_->Init(
522      browse_filename_,
523      base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
524                 base::Unretained(this)));
525  DVLOG(1) << "Init browse store: " << browse_filename_.value();
526
527  {
528    // NOTE: There is no need to grab the lock in this function, since
529    // until it returns, there are no pointers to this class on other
530    // threads.  Then again, that means there is no possibility of
531    // contention on the lock...
532    base::AutoLock locked(lookup_lock_);
533    full_browse_hashes_.clear();
534    pending_browse_hashes_.clear();
535    LoadPrefixSet();
536  }
537
538  if (download_store_.get()) {
539    download_filename_ = DownloadDBFilename(filename_base);
540    download_store_->Init(
541        download_filename_,
542        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
543                   base::Unretained(this)));
544    DVLOG(1) << "Init download store: " << download_filename_.value();
545  }
546
547  if (csd_whitelist_store_.get()) {
548    csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
549    csd_whitelist_store_->Init(
550        csd_whitelist_filename_,
551        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
552                   base::Unretained(this)));
553    DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
554    std::vector<SBAddFullHash> full_hashes;
555    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
556      LoadWhitelist(full_hashes, &csd_whitelist_);
557    } else {
558      WhitelistEverything(&csd_whitelist_);
559    }
560  } else {
561    WhitelistEverything(&csd_whitelist_);  // Just to be safe.
562  }
563
564  if (download_whitelist_store_.get()) {
565    download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
566    download_whitelist_store_->Init(
567        download_whitelist_filename_,
568        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
569                   base::Unretained(this)));
570    DVLOG(1) << "Init download whitelist store: "
571             << download_whitelist_filename_.value();
572    std::vector<SBAddFullHash> full_hashes;
573    if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
574      LoadWhitelist(full_hashes, &download_whitelist_);
575    } else {
576      WhitelistEverything(&download_whitelist_);
577    }
578  } else {
579    WhitelistEverything(&download_whitelist_);  // Just to be safe.
580  }
581
582  if (extension_blacklist_store_.get()) {
583    extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
584    extension_blacklist_store_->Init(
585        extension_blacklist_filename_,
586        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
587                   base::Unretained(this)));
588    DVLOG(1) << "Init extension blacklist store: "
589             << extension_blacklist_filename_.value();
590  }
591
592  if (side_effect_free_whitelist_store_.get()) {
593    side_effect_free_whitelist_filename_ =
594        SideEffectFreeWhitelistDBFilename(filename_base);
595    side_effect_free_whitelist_prefix_set_filename_ =
596        PrefixSetForFilename(side_effect_free_whitelist_filename_);
597    side_effect_free_whitelist_store_->Init(
598        side_effect_free_whitelist_filename_,
599        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
600                   base::Unretained(this)));
601    DVLOG(1) << "Init side-effect free whitelist store: "
602             << side_effect_free_whitelist_filename_.value();
603
604    // If there is no database, the filter cannot be used.
605    base::File::Info db_info;
606    if (base::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
607        && db_info.size != 0) {
608      const base::TimeTicks before = base::TimeTicks::Now();
609      side_effect_free_whitelist_prefix_set_.reset(
610          safe_browsing::PrefixSet::LoadFile(
611              side_effect_free_whitelist_prefix_set_filename_));
612      DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
613               << "prefix set in "
614               << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
615      UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
616                          base::TimeTicks::Now() - before);
617      if (!side_effect_free_whitelist_prefix_set_.get())
618        RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
619    }
620  } else {
621    // Delete any files of the side-effect free sidelist that may be around
622    // from when it was previously enabled.
623    SafeBrowsingStoreFile::DeleteStore(
624        SideEffectFreeWhitelistDBFilename(filename_base));
625  }
626
627  if (ip_blacklist_store_.get()) {
628    ip_blacklist_filename_ = IpBlacklistDBFilename(filename_base);
629    ip_blacklist_store_->Init(
630        ip_blacklist_filename_,
631        base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
632                   base::Unretained(this)));
633    DVLOG(1) << "SafeBrowsingDatabaseNew read ip blacklist: "
634             << ip_blacklist_filename_.value();
635    std::vector<SBAddFullHash> full_hashes;
636    if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
637      LoadIpBlacklist(full_hashes);
638    } else {
639      DVLOG(1) << "Unable to load full hashes from the IP blacklist.";
640      LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
641    }
642  }
643}
644
645bool SafeBrowsingDatabaseNew::ResetDatabase() {
646  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
647
648  // Delete files on disk.
649  // TODO(shess): Hard to see where one might want to delete without a
650  // reset.  Perhaps inline |Delete()|?
651  if (!Delete())
652    return false;
653
654  // Reset objects in memory.
655  {
656    base::AutoLock locked(lookup_lock_);
657    full_browse_hashes_.clear();
658    pending_browse_hashes_.clear();
659    prefix_miss_cache_.clear();
660    browse_prefix_set_.reset();
661    side_effect_free_whitelist_prefix_set_.reset();
662    ip_blacklist_.clear();
663  }
664  // Wants to acquire the lock itself.
665  WhitelistEverything(&csd_whitelist_);
666  WhitelistEverything(&download_whitelist_);
667  return true;
668}
669
670// TODO(lzheng): Remove matching_list, it is not used anywhere.
671bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
672    const GURL& url,
673    std::string* matching_list,
674    std::vector<SBPrefix>* prefix_hits,
675    std::vector<SBFullHashResult>* full_hits,
676    base::Time last_update) {
677  // Clear the results first.
678  matching_list->clear();
679  prefix_hits->clear();
680  full_hits->clear();
681
682  std::vector<SBFullHash> full_hashes;
683  BrowseFullHashesToCheck(url, false, &full_hashes);
684  if (full_hashes.empty())
685    return false;
686
687  // This function is called on the I/O thread, prevent changes to
688  // filter and caches.
689  base::AutoLock locked(lookup_lock_);
690
691  // |browse_prefix_set_| is empty until it is either read from disk, or the
692  // first update populates it.  Bail out without a hit if not yet
693  // available.
694  if (!browse_prefix_set_.get())
695    return false;
696
697  size_t miss_count = 0;
698  for (size_t i = 0; i < full_hashes.size(); ++i) {
699    const SBPrefix prefix = full_hashes[i].prefix;
700    if (browse_prefix_set_->Exists(prefix)) {
701      prefix_hits->push_back(prefix);
702      if (prefix_miss_cache_.count(prefix) > 0)
703        ++miss_count;
704    }
705  }
706
707  // If all the prefixes are cached as 'misses', don't issue a GetHash.
708  if (miss_count == prefix_hits->size())
709    return false;
710
711  // Find the matching full-hash results.  |full_browse_hashes_| are from the
712  // database, |pending_browse_hashes_| are from GetHash requests between
713  // updates.
714  std::sort(prefix_hits->begin(), prefix_hits->end());
715
716  GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
717                               full_hits, last_update);
718  GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
719                               full_hits, last_update);
720  return true;
721}
722
723bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
724    const std::vector<GURL>& urls,
725    std::vector<SBPrefix>* prefix_hits) {
726  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
727
728  // Ignore this check when download checking is not enabled.
729  if (!download_store_.get())
730    return false;
731
732  std::vector<SBPrefix> prefixes;
733  GetDownloadUrlPrefixes(urls, &prefixes);
734  return MatchAddPrefixes(download_store_.get(),
735                          safe_browsing_util::BINURL % 2,
736                          prefixes,
737                          prefix_hits);
738}
739
740bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
741  // This method is theoretically thread-safe but we expect all calls to
742  // originate from the IO thread.
743  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
744  std::vector<SBFullHash> full_hashes;
745  BrowseFullHashesToCheck(url, true, &full_hashes);
746  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
747}
748
749bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
750  std::vector<SBFullHash> full_hashes;
751  BrowseFullHashesToCheck(url, true, &full_hashes);
752  return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
753}
754
755bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
756    const std::vector<SBPrefix>& prefixes,
757    std::vector<SBPrefix>* prefix_hits) {
758  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
759  if (!extension_blacklist_store_)
760    return false;
761
762  return MatchAddPrefixes(extension_blacklist_store_.get(),
763                          safe_browsing_util::EXTENSIONBLACKLIST % 2,
764                          prefixes,
765                          prefix_hits);
766}
767
768bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
769    const GURL& url) {
770  std::string host;
771  std::string path;
772  std::string query;
773  safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
774  std::string url_to_check = host + path;
775  if (!query.empty())
776    url_to_check +=  "?" + query;
777  SBFullHash full_hash = SBFullHashForString(url_to_check);
778
779  // This function can be called on any thread, so lock against any changes
780  base::AutoLock locked(lookup_lock_);
781
782  // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
783  // from disk, or the first update populates it.  Bail out without a hit if
784  // not yet available.
785  if (!side_effect_free_whitelist_prefix_set_.get())
786    return false;
787
788  return side_effect_free_whitelist_prefix_set_->Exists(full_hash.prefix);
789}
790
791bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
792  net::IPAddressNumber ip_number;
793  if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
794    DVLOG(2) << "Unable to parse IP address: '" << ip_address << "'";
795    return false;
796  }
797  if (ip_number.size() == net::kIPv4AddressSize) {
798    ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
799  }
800  if (ip_number.size() != net::kIPv6AddressSize) {
801    DVLOG(2) << "Unable to convert IPv4 address to IPv6: '"
802             << ip_address << "'";
803    return false;  // better safe than sorry.
804  }
805  // This function can be called from any thread.
806  base::AutoLock locked(lookup_lock_);
807  for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
808       it != ip_blacklist_.end();
809       ++it) {
810    const std::string& mask = it->first;
811    DCHECK_EQ(mask.size(), ip_number.size());
812    std::string subnet(net::kIPv6AddressSize, '\0');
813    for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
814      subnet[i] = ip_number[i] & mask[i];
815    }
816    const std::string hash = base::SHA1HashString(subnet);
817    DVLOG(2) << "Lookup Malware IP: "
818             << " ip:" << ip_address
819             << " mask:" << base::HexEncode(mask.data(), mask.size())
820             << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
821             << " hash:" << base::HexEncode(hash.data(), hash.size());
822    if (it->second.count(hash) > 0) {
823      return true;
824    }
825  }
826  return false;
827}
828
829bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
830    const std::string& str) {
831  std::vector<SBFullHash> hashes;
832  hashes.push_back(SBFullHashForString(str));
833  return ContainsWhitelistedHashes(download_whitelist_, hashes);
834}
835
836bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
837    const SBWhitelist& whitelist,
838    const std::vector<SBFullHash>& hashes) {
839  base::AutoLock l(lookup_lock_);
840  if (whitelist.second)
841    return true;
842  for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
843       it != hashes.end(); ++it) {
844    if (std::binary_search(whitelist.first.begin(), whitelist.first.end(),
845                           *it, SBFullHashLess)) {
846      return true;
847    }
848  }
849  return false;
850}
851
852// Helper to insert entries for all of the prefixes or full hashes in
853// |entry| into the store.
854void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
855                                        const SBEntry* entry, int list_id) {
856  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
857
858  SafeBrowsingStore* store = GetStore(list_id);
859  if (!store) return;
860
861  STATS_COUNTER("SB.HostInsert", 1);
862  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
863  const int count = entry->prefix_count();
864
865  DCHECK(!entry->IsSub());
866  if (!count) {
867    // No prefixes, use host instead.
868    STATS_COUNTER("SB.PrefixAdd", 1);
869    store->WriteAddPrefix(encoded_chunk_id, host);
870  } else if (entry->IsPrefix()) {
871    // Prefixes only.
872    for (int i = 0; i < count; i++) {
873      const SBPrefix prefix = entry->PrefixAt(i);
874      STATS_COUNTER("SB.PrefixAdd", 1);
875      store->WriteAddPrefix(encoded_chunk_id, prefix);
876    }
877  } else {
878    // Prefixes and hashes.
879    const base::Time receive_time = base::Time::Now();
880    for (int i = 0; i < count; ++i) {
881      const SBFullHash full_hash = entry->FullHashAt(i);
882      const SBPrefix prefix = full_hash.prefix;
883
884      STATS_COUNTER("SB.PrefixAdd", 1);
885      store->WriteAddPrefix(encoded_chunk_id, prefix);
886
887      STATS_COUNTER("SB.PrefixAddFull", 1);
888      store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
889    }
890  }
891}
892
893// Helper to iterate over all the entries in the hosts in |chunks| and
894// add them to the store.
895void SafeBrowsingDatabaseNew::InsertAddChunks(
896    const safe_browsing_util::ListType list_id,
897    const SBChunkList& chunks) {
898  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
899
900  SafeBrowsingStore* store = GetStore(list_id);
901  if (!store) return;
902
903  for (SBChunkList::const_iterator citer = chunks.begin();
904       citer != chunks.end(); ++citer) {
905    const int chunk_id = citer->chunk_number;
906
907    // The server can give us a chunk that we already have because
908    // it's part of a range.  Don't add it again.
909    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
910    if (store->CheckAddChunk(encoded_chunk_id))
911      continue;
912
913    store->SetAddChunk(encoded_chunk_id);
914    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
915         hiter != citer->hosts.end(); ++hiter) {
916      // NOTE: Could pass |encoded_chunk_id|, but then inserting add
917      // chunks would look different from inserting sub chunks.
918      InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
919    }
920  }
921}
922
923// Helper to insert entries for all of the prefixes or full hashes in
924// |entry| into the store.
925void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
926                                        const SBEntry* entry, int list_id) {
927  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
928
929  SafeBrowsingStore* store = GetStore(list_id);
930  if (!store) return;
931
932  STATS_COUNTER("SB.HostDelete", 1);
933  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
934  const int count = entry->prefix_count();
935
936  DCHECK(entry->IsSub());
937  if (!count) {
938    // No prefixes, use host instead.
939    STATS_COUNTER("SB.PrefixSub", 1);
940    const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
941    store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
942  } else if (entry->IsPrefix()) {
943    // Prefixes only.
944    for (int i = 0; i < count; i++) {
945      const SBPrefix prefix = entry->PrefixAt(i);
946      const int add_chunk_id =
947          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
948
949      STATS_COUNTER("SB.PrefixSub", 1);
950      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
951    }
952  } else {
953    // Prefixes and hashes.
954    for (int i = 0; i < count; ++i) {
955      const SBFullHash full_hash = entry->FullHashAt(i);
956      const int add_chunk_id =
957          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
958
959      STATS_COUNTER("SB.PrefixSub", 1);
960      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
961
962      STATS_COUNTER("SB.PrefixSubFull", 1);
963      store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
964    }
965  }
966}
967
968// Helper to iterate over all the entries in the hosts in |chunks| and
969// add them to the store.
970void SafeBrowsingDatabaseNew::InsertSubChunks(
971    safe_browsing_util::ListType list_id,
972    const SBChunkList& chunks) {
973  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
974
975  SafeBrowsingStore* store = GetStore(list_id);
976  if (!store) return;
977
978  for (SBChunkList::const_iterator citer = chunks.begin();
979       citer != chunks.end(); ++citer) {
980    const int chunk_id = citer->chunk_number;
981
982    // The server can give us a chunk that we already have because
983    // it's part of a range.  Don't add it again.
984    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
985    if (store->CheckSubChunk(encoded_chunk_id))
986      continue;
987
988    store->SetSubChunk(encoded_chunk_id);
989    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
990         hiter != citer->hosts.end(); ++hiter) {
991      InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
992    }
993  }
994}
995
996void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
997                                           const SBChunkList& chunks) {
998  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
999
1000  if (corruption_detected_ || chunks.empty())
1001    return;
1002
1003  const base::TimeTicks before = base::TimeTicks::Now();
1004
1005  const safe_browsing_util::ListType list_id =
1006      safe_browsing_util::GetListId(list_name);
1007  DVLOG(2) << list_name << ": " << list_id;
1008
1009  SafeBrowsingStore* store = GetStore(list_id);
1010  if (!store) return;
1011
1012  change_detected_ = true;
1013
1014  store->BeginChunk();
1015  if (chunks.front().is_add) {
1016    InsertAddChunks(list_id, chunks);
1017  } else {
1018    InsertSubChunks(list_id, chunks);
1019  }
1020  store->FinishChunk();
1021
1022  UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
1023}
1024
1025void SafeBrowsingDatabaseNew::DeleteChunks(
1026    const std::vector<SBChunkDelete>& chunk_deletes) {
1027  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1028
1029  if (corruption_detected_ || chunk_deletes.empty())
1030    return;
1031
1032  const std::string& list_name = chunk_deletes.front().list_name;
1033  const safe_browsing_util::ListType list_id =
1034      safe_browsing_util::GetListId(list_name);
1035
1036  SafeBrowsingStore* store = GetStore(list_id);
1037  if (!store) return;
1038
1039  change_detected_ = true;
1040
1041  for (size_t i = 0; i < chunk_deletes.size(); ++i) {
1042    std::vector<int> chunk_numbers;
1043    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
1044    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
1045      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
1046      if (chunk_deletes[i].is_sub_del)
1047        store->DeleteSubChunk(encoded_chunk_id);
1048      else
1049        store->DeleteAddChunk(encoded_chunk_id);
1050    }
1051  }
1052}
1053
1054void SafeBrowsingDatabaseNew::CacheHashResults(
1055    const std::vector<SBPrefix>& prefixes,
1056    const std::vector<SBFullHashResult>& full_hits) {
1057  // This is called on the I/O thread, lock against updates.
1058  base::AutoLock locked(lookup_lock_);
1059
1060  if (full_hits.empty()) {
1061    prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
1062    return;
1063  }
1064
1065  // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
1066  // Refactor to make them identical.
1067  const base::Time now = base::Time::Now();
1068  const size_t orig_size = pending_browse_hashes_.size();
1069  for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
1070       iter != full_hits.end(); ++iter) {
1071    const int list_id = safe_browsing_util::GetListId(iter->list_name);
1072    if (list_id == safe_browsing_util::MALWARE ||
1073        list_id == safe_browsing_util::PHISH) {
1074      int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
1075      SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
1076      pending_browse_hashes_.push_back(add_full_hash);
1077    }
1078  }
1079
1080  // Sort new entries then merge with the previously-sorted entries.
1081  std::vector<SBAddFullHash>::iterator
1082      orig_end = pending_browse_hashes_.begin() + orig_size;
1083  std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
1084  std::inplace_merge(pending_browse_hashes_.begin(),
1085                     orig_end, pending_browse_hashes_.end(),
1086                     SBAddFullHashPrefixLess);
1087}
1088
1089bool SafeBrowsingDatabaseNew::UpdateStarted(
1090    std::vector<SBListChunkRanges>* lists) {
1091  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1092  DCHECK(lists);
1093
1094  // If |BeginUpdate()| fails, reset the database.
1095  if (!browse_store_->BeginUpdate()) {
1096    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
1097    HandleCorruptDatabase();
1098    return false;
1099  }
1100
1101  if (download_store_.get() && !download_store_->BeginUpdate()) {
1102    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
1103    HandleCorruptDatabase();
1104    return false;
1105  }
1106
1107  if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
1108    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1109    HandleCorruptDatabase();
1110    return false;
1111  }
1112
1113  if (download_whitelist_store_.get() &&
1114      !download_whitelist_store_->BeginUpdate()) {
1115    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
1116    HandleCorruptDatabase();
1117    return false;
1118  }
1119
1120  if (extension_blacklist_store_ &&
1121      !extension_blacklist_store_->BeginUpdate()) {
1122    RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
1123    HandleCorruptDatabase();
1124    return false;
1125  }
1126
1127  if (side_effect_free_whitelist_store_ &&
1128      !side_effect_free_whitelist_store_->BeginUpdate()) {
1129    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
1130    HandleCorruptDatabase();
1131    return false;
1132  }
1133
1134  if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1135    RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1136    HandleCorruptDatabase();
1137    return false;
1138  }
1139
1140  UpdateChunkRangesForLists(browse_store_.get(),
1141                            safe_browsing_util::kMalwareList,
1142                            safe_browsing_util::kPhishingList,
1143                            lists);
1144
1145  // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1146  // deprecated.  Code to delete the list from the store shows ~15k hits/day as
1147  // of Feb 2014, so it has been removed.  Everything _should_ be resilient to
1148  // extra data of that sort.
1149  UpdateChunkRangesForList(download_store_.get(),
1150                           safe_browsing_util::kBinUrlList, lists);
1151
1152  UpdateChunkRangesForList(csd_whitelist_store_.get(),
1153                           safe_browsing_util::kCsdWhiteList, lists);
1154
1155  UpdateChunkRangesForList(download_whitelist_store_.get(),
1156                           safe_browsing_util::kDownloadWhiteList, lists);
1157
1158  UpdateChunkRangesForList(extension_blacklist_store_.get(),
1159                           safe_browsing_util::kExtensionBlacklist, lists);
1160
1161  UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(),
1162                           safe_browsing_util::kSideEffectFreeWhitelist, lists);
1163
1164  UpdateChunkRangesForList(ip_blacklist_store_.get(),
1165                           safe_browsing_util::kIPBlacklist, lists);
1166
1167  corruption_detected_ = false;
1168  change_detected_ = false;
1169  return true;
1170}
1171
1172void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1173  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1174
1175  // The update may have failed due to corrupt storage (for instance,
1176  // an excessive number of invalid add_chunks and sub_chunks).
1177  // Double-check that the databases are valid.
1178  // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
1179  // sections would allow throwing a corruption error in
1180  // UpdateStarted().
1181  if (!update_succeeded) {
1182    if (!browse_store_->CheckValidity())
1183      DLOG(ERROR) << "Safe-browsing browse database corrupt.";
1184
1185    if (download_store_.get() && !download_store_->CheckValidity())
1186      DLOG(ERROR) << "Safe-browsing download database corrupt.";
1187
1188    if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
1189      DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
1190
1191    if (download_whitelist_store_.get() &&
1192        !download_whitelist_store_->CheckValidity()) {
1193      DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
1194    }
1195
1196    if (extension_blacklist_store_ &&
1197        !extension_blacklist_store_->CheckValidity()) {
1198      DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
1199    }
1200
1201    if (side_effect_free_whitelist_store_ &&
1202        !side_effect_free_whitelist_store_->CheckValidity()) {
1203      DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
1204                  << "corrupt.";
1205    }
1206
1207    if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
1208      DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
1209    }
1210  }
1211
1212  if (corruption_detected_)
1213    return;
1214
1215  // Unroll the transaction if there was a protocol error or if the
1216  // transaction was empty.  This will leave the prefix set, the
1217  // pending hashes, and the prefix miss cache in place.
1218  if (!update_succeeded || !change_detected_) {
1219    // Track empty updates to answer questions at http://crbug.com/72216 .
1220    if (update_succeeded && !change_detected_)
1221      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1222    browse_store_->CancelUpdate();
1223    if (download_store_.get())
1224      download_store_->CancelUpdate();
1225    if (csd_whitelist_store_.get())
1226      csd_whitelist_store_->CancelUpdate();
1227    if (download_whitelist_store_.get())
1228      download_whitelist_store_->CancelUpdate();
1229    if (extension_blacklist_store_)
1230      extension_blacklist_store_->CancelUpdate();
1231    if (side_effect_free_whitelist_store_)
1232      side_effect_free_whitelist_store_->CancelUpdate();
1233    if (ip_blacklist_store_)
1234      ip_blacklist_store_->CancelUpdate();
1235    return;
1236  }
1237
1238  if (download_store_) {
1239    int64 size_bytes = UpdateHashPrefixStore(
1240        download_filename_,
1241        download_store_.get(),
1242        FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1243    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1244                         static_cast<int>(size_bytes / 1024));
1245  }
1246
1247  UpdateBrowseStore();
1248  UpdateWhitelistStore(csd_whitelist_filename_,
1249                       csd_whitelist_store_.get(),
1250                       &csd_whitelist_);
1251  UpdateWhitelistStore(download_whitelist_filename_,
1252                       download_whitelist_store_.get(),
1253                       &download_whitelist_);
1254
1255  if (extension_blacklist_store_) {
1256    int64 size_bytes = UpdateHashPrefixStore(
1257        extension_blacklist_filename_,
1258        extension_blacklist_store_.get(),
1259        FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
1260    UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
1261                         static_cast<int>(size_bytes / 1024));
1262  }
1263
1264  if (side_effect_free_whitelist_store_)
1265    UpdateSideEffectFreeWhitelistStore();
1266
1267  if (ip_blacklist_store_)
1268    UpdateIpBlacklistStore();
1269}
1270
1271void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1272    const base::FilePath& store_filename,
1273    SafeBrowsingStore* store,
1274    SBWhitelist* whitelist) {
1275  if (!store)
1276    return;
1277
1278  // For the whitelists, we don't cache and save full hashes since all
1279  // hashes are already full.
1280  std::vector<SBAddFullHash> empty_add_hashes;
1281
1282  // Note: prefixes will not be empty.  The current data store implementation
1283  // stores all full-length hashes as both full and prefix hashes.
1284  SBAddPrefixes prefixes;
1285  std::vector<SBAddFullHash> full_hashes;
1286  if (!store->FinishUpdate(empty_add_hashes, &prefixes, &full_hashes)) {
1287    RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1288    WhitelistEverything(whitelist);
1289    return;
1290  }
1291
1292#if defined(OS_MACOSX)
1293  base::mac::SetFileBackupExclusion(store_filename);
1294#endif
1295
1296  LoadWhitelist(full_hashes, whitelist);
1297}
1298
1299int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1300    const base::FilePath& store_filename,
1301    SafeBrowsingStore* store,
1302    FailureType failure_type) {
1303  // We don't cache and save full hashes.
1304  std::vector<SBAddFullHash> empty_add_hashes;
1305
1306  // These results are not used after this call. Simply ignore the
1307  // returned value after FinishUpdate(...).
1308  SBAddPrefixes add_prefixes_result;
1309  std::vector<SBAddFullHash> add_full_hashes_result;
1310
1311  if (!store->FinishUpdate(empty_add_hashes,
1312                           &add_prefixes_result,
1313                           &add_full_hashes_result)) {
1314    RecordFailure(failure_type);
1315  }
1316
1317#if defined(OS_MACOSX)
1318  base::mac::SetFileBackupExclusion(store_filename);
1319#endif
1320
1321  return GetFileSizeOrZero(store_filename);
1322}
1323
1324void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1325  // Copy out the pending add hashes.  Copy rather than swapping in
1326  // case |ContainsBrowseURL()| is called before the new filter is complete.
1327  std::vector<SBAddFullHash> pending_add_hashes;
1328  {
1329    base::AutoLock locked(lookup_lock_);
1330    pending_add_hashes.insert(pending_add_hashes.end(),
1331                              pending_browse_hashes_.begin(),
1332                              pending_browse_hashes_.end());
1333  }
1334
1335  // Measure the amount of IO during the filter build.
1336  base::IoCounters io_before, io_after;
1337  base::ProcessHandle handle = base::Process::Current().handle();
1338  scoped_ptr<base::ProcessMetrics> metric(
1339#if !defined(OS_MACOSX)
1340      base::ProcessMetrics::CreateProcessMetrics(handle)
1341#else
1342      // Getting stats only for the current process is enough, so NULL is fine.
1343      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1344#endif
1345  );
1346
1347  // IoCounters are currently not supported on Mac, and may not be
1348  // available for Linux, so we check the result and only show IO
1349  // stats if they are available.
1350  const bool got_counters = metric->GetIOCounters(&io_before);
1351
1352  const base::TimeTicks before = base::TimeTicks::Now();
1353
1354  SBAddPrefixes add_prefixes;
1355  std::vector<SBAddFullHash> add_full_hashes;
1356  if (!browse_store_->FinishUpdate(pending_add_hashes,
1357                                   &add_prefixes, &add_full_hashes)) {
1358    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1359    return;
1360  }
1361
1362  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1363  // could be passed directly to |PrefixSet()|, removing the need for
1364  // |prefixes|.  For now, |prefixes| is useful while debugging
1365  // things.
1366  std::vector<SBPrefix> prefixes;
1367  prefixes.reserve(add_prefixes.size());
1368  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1369       iter != add_prefixes.end(); ++iter) {
1370    prefixes.push_back(iter->prefix);
1371  }
1372
1373  std::sort(prefixes.begin(), prefixes.end());
1374  scoped_ptr<safe_browsing::PrefixSet>
1375      prefix_set(new safe_browsing::PrefixSet(prefixes));
1376
1377  // This needs to be in sorted order by prefix for efficient access.
1378  std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1379            SBAddFullHashPrefixLess);
1380
1381  // Swap in the newly built filter and cache.
1382  {
1383    base::AutoLock locked(lookup_lock_);
1384    full_browse_hashes_.swap(add_full_hashes);
1385
1386    // TODO(shess): If |CacheHashResults()| is posted between the
1387    // earlier lock and this clear, those pending hashes will be lost.
1388    // It could be fixed by only removing hashes which were collected
1389    // at the earlier point.  I believe that is fail-safe as-is (the
1390    // hash will be fetched again).
1391    pending_browse_hashes_.clear();
1392    prefix_miss_cache_.clear();
1393    browse_prefix_set_.swap(prefix_set);
1394  }
1395
1396  DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1397           << (base::TimeTicks::Now() - before).InMilliseconds()
1398           << " ms total.  prefix count: " << add_prefixes.size();
1399  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1400
1401  // Persist the prefix set to disk.  Since only this thread changes
1402  // |browse_prefix_set_|, there is no need to lock.
1403  WritePrefixSet();
1404
1405  // Gather statistics.
1406  if (got_counters && metric->GetIOCounters(&io_after)) {
1407    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1408                         static_cast<int>(io_after.ReadTransferCount -
1409                                          io_before.ReadTransferCount) / 1024);
1410    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1411                         static_cast<int>(io_after.WriteTransferCount -
1412                                          io_before.WriteTransferCount) / 1024);
1413    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1414                         static_cast<int>(io_after.ReadOperationCount -
1415                                          io_before.ReadOperationCount));
1416    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1417                         static_cast<int>(io_after.WriteOperationCount -
1418                                          io_before.WriteOperationCount));
1419  }
1420
1421  int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
1422  UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1423                       static_cast<int>(file_size / 1024));
1424  file_size = GetFileSizeOrZero(browse_filename_);
1425  UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1426                       static_cast<int>(file_size / 1024));
1427
1428#if defined(OS_MACOSX)
1429  base::mac::SetFileBackupExclusion(browse_filename_);
1430#endif
1431}
1432
1433void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1434  std::vector<SBAddFullHash> empty_add_hashes;
1435  SBAddPrefixes add_prefixes;
1436  std::vector<SBAddFullHash> add_full_hashes_result;
1437
1438  if (!side_effect_free_whitelist_store_->FinishUpdate(
1439          empty_add_hashes,
1440          &add_prefixes,
1441          &add_full_hashes_result)) {
1442    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1443    return;
1444  }
1445
1446  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1447  // could be passed directly to |PrefixSet()|, removing the need for
1448  // |prefixes|.  For now, |prefixes| is useful while debugging
1449  // things.
1450  std::vector<SBPrefix> prefixes;
1451  prefixes.reserve(add_prefixes.size());
1452  for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1453       iter != add_prefixes.end(); ++iter) {
1454    prefixes.push_back(iter->prefix);
1455  }
1456
1457  std::sort(prefixes.begin(), prefixes.end());
1458  scoped_ptr<safe_browsing::PrefixSet>
1459      prefix_set(new safe_browsing::PrefixSet(prefixes));
1460
1461  // Swap in the newly built prefix set.
1462  {
1463    base::AutoLock locked(lookup_lock_);
1464    side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1465  }
1466
1467  const base::TimeTicks before = base::TimeTicks::Now();
1468  const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
1469      side_effect_free_whitelist_prefix_set_filename_);
1470  DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
1471           << "set in " << (base::TimeTicks::Now() - before).InMilliseconds()
1472           << " ms";
1473  UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
1474                      base::TimeTicks::Now() - before);
1475
1476  if (!write_ok)
1477    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
1478
1479  // Gather statistics.
1480  int64 file_size = GetFileSizeOrZero(
1481      side_effect_free_whitelist_prefix_set_filename_);
1482  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
1483                       static_cast<int>(file_size / 1024));
1484  file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
1485  UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
1486                       static_cast<int>(file_size / 1024));
1487
1488#if defined(OS_MACOSX)
1489  base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1490  base::mac::SetFileBackupExclusion(
1491      side_effect_free_whitelist_prefix_set_filename_);
1492#endif
1493}
1494
1495void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1496  // For the IP blacklist, we don't cache and save full hashes since all
1497  // hashes are already full.
1498  std::vector<SBAddFullHash> empty_add_hashes;
1499
1500  // Note: prefixes will not be empty.  The current data store implementation
1501  // stores all full-length hashes as both full and prefix hashes.
1502  SBAddPrefixes prefixes;
1503  std::vector<SBAddFullHash> full_hashes;
1504  if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes,
1505                                         &prefixes, &full_hashes)) {
1506    RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1507    LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
1508    return;
1509  }
1510
1511#if defined(OS_MACOSX)
1512  base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1513#endif
1514
1515  LoadIpBlacklist(full_hashes);
1516}
1517
1518void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1519  // Reset the database after the current task has unwound (but only
1520  // reset once within the scope of a given task).
1521  if (!reset_factory_.HasWeakPtrs()) {
1522    RecordFailure(FAILURE_DATABASE_CORRUPT);
1523    base::MessageLoop::current()->PostTask(FROM_HERE,
1524        base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1525                   reset_factory_.GetWeakPtr()));
1526  }
1527}
1528
1529void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1530  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1531  corruption_detected_ = true;  // Stop updating the database.
1532  ResetDatabase();
1533  DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1534}
1535
1536// TODO(shess): I'm not clear why this code doesn't have any
1537// real error-handling.
1538void SafeBrowsingDatabaseNew::LoadPrefixSet() {
1539  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1540  DCHECK(!browse_prefix_set_filename_.empty());
1541
1542  // If there is no database, the filter cannot be used.
1543  base::File::Info db_info;
1544  if (!base::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1545    return;
1546
1547  // Cleanup any stale bloom filter (no longer used).
1548  // TODO(shess): Track failure to delete?
1549  base::FilePath bloom_filter_filename =
1550      BloomFilterForFilename(browse_filename_);
1551  base::DeleteFile(bloom_filter_filename, false);
1552
1553  const base::TimeTicks before = base::TimeTicks::Now();
1554  browse_prefix_set_.reset(safe_browsing::PrefixSet::LoadFile(
1555      browse_prefix_set_filename_));
1556  DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1557           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1558  UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1559
1560  if (!browse_prefix_set_.get())
1561    RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
1562}
1563
1564bool SafeBrowsingDatabaseNew::Delete() {
1565  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1566
1567  const bool r1 = browse_store_->Delete();
1568  if (!r1)
1569    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1570
1571  const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1572  if (!r2)
1573    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1574
1575  const bool r3 = csd_whitelist_store_.get() ?
1576      csd_whitelist_store_->Delete() : true;
1577  if (!r3)
1578    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1579
1580  const bool r4 = download_whitelist_store_.get() ?
1581      download_whitelist_store_->Delete() : true;
1582  if (!r4)
1583    RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1584
1585  base::FilePath bloom_filter_filename =
1586      BloomFilterForFilename(browse_filename_);
1587  const bool r5 = base::DeleteFile(bloom_filter_filename, false);
1588  if (!r5)
1589    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1590
1591  const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
1592  if (!r6)
1593    RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
1594
1595  const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
1596  if (!r7)
1597    RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
1598
1599  const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
1600                                    false);
1601  if (!r8)
1602    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
1603
1604  const bool r9 = base::DeleteFile(
1605      side_effect_free_whitelist_prefix_set_filename_,
1606      false);
1607  if (!r9)
1608    RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
1609
1610  const bool r10 = base::DeleteFile(ip_blacklist_filename_, false);
1611  if (!r10)
1612    RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
1613
1614  return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
1615}
1616
1617void SafeBrowsingDatabaseNew::WritePrefixSet() {
1618  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1619
1620  if (!browse_prefix_set_.get())
1621    return;
1622
1623  const base::TimeTicks before = base::TimeTicks::Now();
1624  const bool write_ok = browse_prefix_set_->WriteFile(
1625      browse_prefix_set_filename_);
1626  DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1627           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1628  UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1629
1630  if (!write_ok)
1631    RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
1632
1633#if defined(OS_MACOSX)
1634  base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
1635#endif
1636}
1637
1638void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1639  base::AutoLock locked(lookup_lock_);
1640  whitelist->second = true;
1641  whitelist->first.clear();
1642}
1643
1644void SafeBrowsingDatabaseNew::LoadWhitelist(
1645    const std::vector<SBAddFullHash>& full_hashes,
1646    SBWhitelist* whitelist) {
1647  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1648  if (full_hashes.size() > kMaxWhitelistSize) {
1649    WhitelistEverything(whitelist);
1650    return;
1651  }
1652
1653  std::vector<SBFullHash> new_whitelist;
1654  new_whitelist.reserve(full_hashes.size());
1655  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1656       it != full_hashes.end(); ++it) {
1657    new_whitelist.push_back(it->full_hash);
1658  }
1659  std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess);
1660
1661  SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl);
1662  if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1663                         kill_switch, SBFullHashLess)) {
1664    // The kill switch is whitelisted hence we whitelist all URLs.
1665    WhitelistEverything(whitelist);
1666  } else {
1667    base::AutoLock locked(lookup_lock_);
1668    whitelist->second = false;
1669    whitelist->first.swap(new_whitelist);
1670  }
1671}
1672
1673void SafeBrowsingDatabaseNew::LoadIpBlacklist(
1674    const std::vector<SBAddFullHash>& full_hashes) {
1675  DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1676  IPBlacklist new_blacklist;
1677  DVLOG(2) << "Writing IP blacklist of size: " << full_hashes.size();
1678  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1679       it != full_hashes.end();
1680       ++it) {
1681    const char* full_hash = it->full_hash.full_hash;
1682    DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
1683    // The format of the IP blacklist is:
1684    // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
1685    std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
1686    size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
1687    if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
1688      DVLOG(2) << "Invalid IP prefix size in IP blacklist: " << prefix_size;
1689      RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
1690      new_blacklist.clear();  // Load empty blacklist.
1691      break;
1692    }
1693
1694    // We precompute the mask for the given subnet size to speed up lookups.
1695    // Basically we need to create a 16B long string which has the highest
1696    // |size| bits sets to one.
1697    std::string mask(net::kIPv6AddressSize, '\0');
1698    mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
1699    if ((prefix_size % 8) != 0) {
1700      mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
1701    }
1702    DVLOG(2) << "Inserting malicious IP: "
1703             << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
1704             << " mask:" << base::HexEncode(mask.data(), mask.size())
1705             << " prefix_size:" << prefix_size
1706             << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
1707                                                 hashed_ip_prefix.size());
1708    new_blacklist[mask].insert(hashed_ip_prefix);
1709  }
1710
1711  base::AutoLock locked(lookup_lock_);
1712  ip_blacklist_.swap(new_blacklist);
1713}
1714
1715bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1716  SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1717  std::vector<SBFullHash> full_hashes;
1718  full_hashes.push_back(malware_kill_switch);
1719  return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1720}
1721