safe_browsing_database.cc revision f8ee788a64d60abd8f2d742a5fdedde054ecd910
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/safe_browsing/safe_browsing_database.h" 6 7#include <algorithm> 8#include <iterator> 9 10#include "base/bind.h" 11#include "base/file_util.h" 12#include "base/message_loop/message_loop.h" 13#include "base/metrics/histogram.h" 14#include "base/metrics/stats_counters.h" 15#include "base/process/process.h" 16#include "base/process/process_metrics.h" 17#include "base/sha1.h" 18#include "base/strings/string_number_conversions.h" 19#include "base/strings/stringprintf.h" 20#include "base/time/time.h" 21#include "chrome/browser/safe_browsing/prefix_set.h" 22#include "chrome/browser/safe_browsing/safe_browsing_store_file.h" 23#include "content/public/browser/browser_thread.h" 24#include "crypto/sha2.h" 25#include "net/base/net_util.h" 26#include "url/gurl.h" 27 28#if defined(OS_MACOSX) 29#include "base/mac/mac_util.h" 30#endif 31 32using content::BrowserThread; 33 34namespace { 35 36// Filename suffix for the bloom filter. 37const base::FilePath::CharType kBloomFilterFile[] = 38 FILE_PATH_LITERAL(" Filter 2"); 39// Filename suffix for the prefix set. 40const base::FilePath::CharType kPrefixSetFile[] = 41 FILE_PATH_LITERAL(" Prefix Set"); 42// Filename suffix for download store. 43const base::FilePath::CharType kDownloadDBFile[] = 44 FILE_PATH_LITERAL(" Download"); 45// Filename suffix for client-side phishing detection whitelist store. 46const base::FilePath::CharType kCsdWhitelistDBFile[] = 47 FILE_PATH_LITERAL(" Csd Whitelist"); 48// Filename suffix for the download whitelist store. 49const base::FilePath::CharType kDownloadWhitelistDBFile[] = 50 FILE_PATH_LITERAL(" Download Whitelist"); 51// Filename suffix for the extension blacklist store. 52const base::FilePath::CharType kExtensionBlacklistDBFile[] = 53 FILE_PATH_LITERAL(" Extension Blacklist"); 54// Filename suffix for the side-effect free whitelist store. 55const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] = 56 FILE_PATH_LITERAL(" Side-Effect Free Whitelist"); 57// Filename suffix for the csd malware IP blacklist store. 58const base::FilePath::CharType kIPBlacklistDBFile[] = 59 FILE_PATH_LITERAL(" IP Blacklist"); 60 61// Filename suffix for browse store. 62// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. 63// Unfortunately, to change the name implies lots of transition code 64// for little benefit. If/when file formats change (say to put all 65// the data in one file), that would be a convenient point to rectify 66// this. 67const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 68 69// Maximum number of entries we allow in any of the whitelists. 70// If a whitelist on disk contains more entries then all lookups to 71// the whitelist will be considered a match. 72const size_t kMaxWhitelistSize = 5000; 73 74// If the hash of this exact expression is on a whitelist then all 75// lookups to this whitelist will be considered a match. 76const char kWhitelistKillSwitchUrl[] = 77 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! 78 79// If the hash of this exact expression is on a whitelist then the 80// malware IP blacklisting feature will be disabled in csd. 81// Don't change this! 82const char kMalwareIPKillSwitchUrl[] = 83 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware"; 84 85const size_t kMaxIpPrefixSize = 128; 86const size_t kMinIpPrefixSize = 1; 87 88// To save space, the incoming |chunk_id| and |list_id| are combined 89// into an |encoded_chunk_id| for storage by shifting the |list_id| 90// into the low-order bits. These functions decode that information. 91// TODO(lzheng): It was reasonable when database is saved in sqlite, but 92// there should be better ways to save chunk_id and list_id after we use 93// SafeBrowsingStoreFile. 94int GetListIdBit(const int encoded_chunk_id) { 95 return encoded_chunk_id & 1; 96} 97int DecodeChunkId(int encoded_chunk_id) { 98 return encoded_chunk_id >> 1; 99} 100int EncodeChunkId(const int chunk, const int list_id) { 101 DCHECK_NE(list_id, safe_browsing_util::INVALID); 102 return chunk << 1 | list_id % 2; 103} 104 105// Generate the set of full hashes to check for |url|. If 106// |include_whitelist_hashes| is true we will generate additional path-prefixes 107// to match against the csd whitelist. E.g., if the path-prefix /foo is on the 108// whitelist it should also match /foo/bar which is not the case for all the 109// other lists. We'll also always add a pattern for the empty path. 110// TODO(shess): This function is almost the same as 111// |CompareFullHashes()| in safe_browsing_util.cc, except that code 112// does an early exit on match. Since match should be the infrequent 113// case (phishing or malware found), consider combining this function 114// with that one. 115void BrowseFullHashesToCheck(const GURL& url, 116 bool include_whitelist_hashes, 117 std::vector<SBFullHash>* full_hashes) { 118 std::vector<std::string> hosts; 119 if (url.HostIsIPAddress()) { 120 hosts.push_back(url.host()); 121 } else { 122 safe_browsing_util::GenerateHostsToCheck(url, &hosts); 123 } 124 125 std::vector<std::string> paths; 126 safe_browsing_util::GeneratePathsToCheck(url, &paths); 127 128 for (size_t i = 0; i < hosts.size(); ++i) { 129 for (size_t j = 0; j < paths.size(); ++j) { 130 const std::string& path = paths[j]; 131 full_hashes->push_back(SBFullHashForString(hosts[i] + path)); 132 133 // We may have /foo as path-prefix in the whitelist which should 134 // also match with /foo/bar and /foo?bar. Hence, for every path 135 // that ends in '/' we also add the path without the slash. 136 if (include_whitelist_hashes && 137 path.size() > 1 && 138 path[path.size() - 1] == '/') { 139 full_hashes->push_back( 140 SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1))); 141 } 142 } 143 } 144} 145 146// Get the prefixes matching the download |urls|. 147void GetDownloadUrlPrefixes(const std::vector<GURL>& urls, 148 std::vector<SBPrefix>* prefixes) { 149 std::vector<SBFullHash> full_hashes; 150 for (size_t i = 0; i < urls.size(); ++i) 151 BrowseFullHashesToCheck(urls[i], false, &full_hashes); 152 153 for (size_t i = 0; i < full_hashes.size(); ++i) 154 prefixes->push_back(full_hashes[i].prefix); 155} 156 157// Helper function to compare addprefixes in |store| with |prefixes|. 158// The |list_bit| indicates which list (url or hash) to compare. 159// 160// Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain 161// the actual matching prefixes. 162bool MatchAddPrefixes(SafeBrowsingStore* store, 163 int list_bit, 164 const std::vector<SBPrefix>& prefixes, 165 std::vector<SBPrefix>* prefix_hits) { 166 prefix_hits->clear(); 167 bool found_match = false; 168 169 SBAddPrefixes add_prefixes; 170 store->GetAddPrefixes(&add_prefixes); 171 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 172 iter != add_prefixes.end(); ++iter) { 173 for (size_t j = 0; j < prefixes.size(); ++j) { 174 const SBPrefix& prefix = prefixes[j]; 175 if (prefix == iter->prefix && 176 GetListIdBit(iter->chunk_id) == list_bit) { 177 prefix_hits->push_back(prefix); 178 found_match = true; 179 } 180 } 181 } 182 return found_match; 183} 184 185// Find the entries in |full_hashes| with prefix in |prefix_hits|, and 186// add them to |full_hits| if not expired. "Not expired" is when 187// either |last_update| was recent enough, or the item has been 188// received recently enough. Expired items are not deleted because a 189// future update may make them acceptable again. 190// 191// For efficiency reasons the code walks |prefix_hits| and 192// |full_hashes| in parallel, so they must be sorted by prefix. 193void GetCachedFullHashesForBrowse( 194 const std::vector<SBPrefix>& prefix_hits, 195 const std::vector<SBFullHashCached>& full_hashes, 196 std::vector<SBFullHashResult>* full_hits) { 197 const base::Time now = base::Time::Now(); 198 199 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin(); 200 std::vector<SBFullHashCached>::const_iterator hiter = full_hashes.begin(); 201 202 while (piter != prefix_hits.end() && hiter != full_hashes.end()) { 203 if (*piter < hiter->hash.prefix) { 204 ++piter; 205 } else if (hiter->hash.prefix < *piter) { 206 ++hiter; 207 } else { 208 if (now <= hiter->expire_after) { 209 SBFullHashResult result; 210 result.list_id = hiter->list_id; 211 result.hash = hiter->hash; 212 full_hits->push_back(result); 213 } 214 215 // Only increment |hiter|, |piter| might have multiple hits. 216 ++hiter; 217 } 218 } 219} 220 221// This function generates a chunk range string for |chunks|. It 222// outputs one chunk range string per list and writes it to the 223// |list_ranges| vector. We expect |list_ranges| to already be of the 224// right size. E.g., if |chunks| contains chunks with two different 225// list ids then |list_ranges| must contain two elements. 226void GetChunkRanges(const std::vector<int>& chunks, 227 std::vector<std::string>* list_ranges) { 228 // Since there are 2 possible list ids, there must be exactly two 229 // list ranges. Even if the chunk data should only contain one 230 // line, this code has to somehow handle corruption. 231 DCHECK_EQ(2U, list_ranges->size()); 232 233 std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); 234 for (std::vector<int>::const_iterator iter = chunks.begin(); 235 iter != chunks.end(); ++iter) { 236 int mod_list_id = GetListIdBit(*iter); 237 DCHECK_GE(mod_list_id, 0); 238 DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); 239 decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); 240 } 241 for (size_t i = 0; i < decoded_chunks.size(); ++i) { 242 ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); 243 } 244} 245 246// Helper function to create chunk range lists for Browse related 247// lists. 248void UpdateChunkRanges(SafeBrowsingStore* store, 249 const std::vector<std::string>& listnames, 250 std::vector<SBListChunkRanges>* lists) { 251 if (!store) 252 return; 253 254 DCHECK_GT(listnames.size(), 0U); 255 DCHECK_LE(listnames.size(), 2U); 256 std::vector<int> add_chunks; 257 std::vector<int> sub_chunks; 258 store->GetAddChunks(&add_chunks); 259 store->GetSubChunks(&sub_chunks); 260 261 // Always decode 2 ranges, even if only the first one is expected. 262 // The loop below will only load as many into |lists| as |listnames| 263 // indicates. 264 std::vector<std::string> adds(2); 265 std::vector<std::string> subs(2); 266 GetChunkRanges(add_chunks, &adds); 267 GetChunkRanges(sub_chunks, &subs); 268 269 for (size_t i = 0; i < listnames.size(); ++i) { 270 const std::string& listname = listnames[i]; 271 DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, 272 static_cast<int>(i % 2)); 273 DCHECK_NE(safe_browsing_util::GetListId(listname), 274 safe_browsing_util::INVALID); 275 lists->push_back(SBListChunkRanges(listname)); 276 lists->back().adds.swap(adds[i]); 277 lists->back().subs.swap(subs[i]); 278 } 279} 280 281void UpdateChunkRangesForLists(SafeBrowsingStore* store, 282 const std::string& listname0, 283 const std::string& listname1, 284 std::vector<SBListChunkRanges>* lists) { 285 std::vector<std::string> listnames; 286 listnames.push_back(listname0); 287 listnames.push_back(listname1); 288 UpdateChunkRanges(store, listnames, lists); 289} 290 291void UpdateChunkRangesForList(SafeBrowsingStore* store, 292 const std::string& listname, 293 std::vector<SBListChunkRanges>* lists) { 294 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists); 295} 296 297// Order |SBFullHashCached| items on the prefix part. 298bool SBFullHashCachedPrefixLess(const SBFullHashCached& a, 299 const SBFullHashCached& b) { 300 return a.hash.prefix < b.hash.prefix; 301} 302 303// This code always checks for non-zero file size. This helper makes 304// that less verbose. 305int64 GetFileSizeOrZero(const base::FilePath& file_path) { 306 int64 size_64; 307 if (!base::GetFileSize(file_path, &size_64)) 308 return 0; 309 return size_64; 310} 311 312} // namespace 313 314// The default SafeBrowsingDatabaseFactory. 315class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 316 public: 317 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 318 bool enable_download_protection, 319 bool enable_client_side_whitelist, 320 bool enable_download_whitelist, 321 bool enable_extension_blacklist, 322 bool enable_side_effect_free_whitelist, 323 bool enable_ip_blacklist) OVERRIDE { 324 return new SafeBrowsingDatabaseNew( 325 new SafeBrowsingStoreFile, 326 enable_download_protection ? new SafeBrowsingStoreFile : NULL, 327 enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL, 328 enable_download_whitelist ? new SafeBrowsingStoreFile : NULL, 329 enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL, 330 enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL, 331 enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL); 332 } 333 334 SafeBrowsingDatabaseFactoryImpl() { } 335 336 private: 337 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl); 338}; 339 340// static 341SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; 342 343// Factory method, non-thread safe. Caller has to make sure this s called 344// on SafeBrowsing Thread. 345// TODO(shess): There's no need for a factory any longer. Convert 346// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() 347// callers just construct things directly. 348SafeBrowsingDatabase* SafeBrowsingDatabase::Create( 349 bool enable_download_protection, 350 bool enable_client_side_whitelist, 351 bool enable_download_whitelist, 352 bool enable_extension_blacklist, 353 bool enable_side_effect_free_whitelist, 354 bool enable_ip_blacklist) { 355 if (!factory_) 356 factory_ = new SafeBrowsingDatabaseFactoryImpl(); 357 return factory_->CreateSafeBrowsingDatabase( 358 enable_download_protection, 359 enable_client_side_whitelist, 360 enable_download_whitelist, 361 enable_extension_blacklist, 362 enable_side_effect_free_whitelist, 363 enable_ip_blacklist); 364} 365 366SafeBrowsingDatabase::~SafeBrowsingDatabase() { 367} 368 369// static 370base::FilePath SafeBrowsingDatabase::BrowseDBFilename( 371 const base::FilePath& db_base_filename) { 372 return base::FilePath(db_base_filename.value() + kBrowseDBFile); 373} 374 375// static 376base::FilePath SafeBrowsingDatabase::DownloadDBFilename( 377 const base::FilePath& db_base_filename) { 378 return base::FilePath(db_base_filename.value() + kDownloadDBFile); 379} 380 381// static 382base::FilePath SafeBrowsingDatabase::BloomFilterForFilename( 383 const base::FilePath& db_filename) { 384 return base::FilePath(db_filename.value() + kBloomFilterFile); 385} 386 387// static 388base::FilePath SafeBrowsingDatabase::PrefixSetForFilename( 389 const base::FilePath& db_filename) { 390 return base::FilePath(db_filename.value() + kPrefixSetFile); 391} 392 393// static 394base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( 395 const base::FilePath& db_filename) { 396 return base::FilePath(db_filename.value() + kCsdWhitelistDBFile); 397} 398 399// static 400base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename( 401 const base::FilePath& db_filename) { 402 return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile); 403} 404 405// static 406base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename( 407 const base::FilePath& db_filename) { 408 return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile); 409} 410 411// static 412base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename( 413 const base::FilePath& db_filename) { 414 return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile); 415} 416 417// static 418base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename( 419 const base::FilePath& db_filename) { 420 return base::FilePath(db_filename.value() + kIPBlacklistDBFile); 421} 422 423SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { 424 if (list_id == safe_browsing_util::PHISH || 425 list_id == safe_browsing_util::MALWARE) { 426 return browse_store_.get(); 427 } else if (list_id == safe_browsing_util::BINURL) { 428 return download_store_.get(); 429 } else if (list_id == safe_browsing_util::CSDWHITELIST) { 430 return csd_whitelist_store_.get(); 431 } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) { 432 return download_whitelist_store_.get(); 433 } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) { 434 return extension_blacklist_store_.get(); 435 } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) { 436 return side_effect_free_whitelist_store_.get(); 437 } else if (list_id == safe_browsing_util::IPBLACKLIST) { 438 return ip_blacklist_store_.get(); 439 } 440 return NULL; 441} 442 443// static 444void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) { 445 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type, 446 FAILURE_DATABASE_MAX); 447} 448 449SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() 450 : creation_loop_(base::MessageLoop::current()), 451 browse_store_(new SafeBrowsingStoreFile), 452 reset_factory_(this), 453 corruption_detected_(false), 454 change_detected_(false) { 455 DCHECK(browse_store_.get()); 456 DCHECK(!download_store_.get()); 457 DCHECK(!csd_whitelist_store_.get()); 458 DCHECK(!download_whitelist_store_.get()); 459 DCHECK(!extension_blacklist_store_.get()); 460 DCHECK(!side_effect_free_whitelist_store_.get()); 461 DCHECK(!ip_blacklist_store_.get()); 462} 463 464SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( 465 SafeBrowsingStore* browse_store, 466 SafeBrowsingStore* download_store, 467 SafeBrowsingStore* csd_whitelist_store, 468 SafeBrowsingStore* download_whitelist_store, 469 SafeBrowsingStore* extension_blacklist_store, 470 SafeBrowsingStore* side_effect_free_whitelist_store, 471 SafeBrowsingStore* ip_blacklist_store) 472 : creation_loop_(base::MessageLoop::current()), 473 browse_store_(browse_store), 474 download_store_(download_store), 475 csd_whitelist_store_(csd_whitelist_store), 476 download_whitelist_store_(download_whitelist_store), 477 extension_blacklist_store_(extension_blacklist_store), 478 side_effect_free_whitelist_store_(side_effect_free_whitelist_store), 479 ip_blacklist_store_(ip_blacklist_store), 480 reset_factory_(this), 481 corruption_detected_(false) { 482 DCHECK(browse_store_.get()); 483} 484 485SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { 486 // The DCHECK is disabled due to crbug.com/338486 . 487 // DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 488} 489 490void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { 491 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 492 // Ensure we haven't been run before. 493 DCHECK(browse_filename_.empty()); 494 DCHECK(download_filename_.empty()); 495 DCHECK(csd_whitelist_filename_.empty()); 496 DCHECK(download_whitelist_filename_.empty()); 497 DCHECK(extension_blacklist_filename_.empty()); 498 DCHECK(side_effect_free_whitelist_filename_.empty()); 499 DCHECK(ip_blacklist_filename_.empty()); 500 501 browse_filename_ = BrowseDBFilename(filename_base); 502 browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_); 503 504 browse_store_->Init( 505 browse_filename_, 506 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 507 base::Unretained(this))); 508 509 { 510 // NOTE: There is no need to grab the lock in this function, since 511 // until it returns, there are no pointers to this class on other 512 // threads. Then again, that means there is no possibility of 513 // contention on the lock... 514 base::AutoLock locked(lookup_lock_); 515 cached_browse_hashes_.clear(); 516 LoadPrefixSet(); 517 } 518 519 if (download_store_.get()) { 520 download_filename_ = DownloadDBFilename(filename_base); 521 download_store_->Init( 522 download_filename_, 523 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 524 base::Unretained(this))); 525 } 526 527 if (csd_whitelist_store_.get()) { 528 csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base); 529 csd_whitelist_store_->Init( 530 csd_whitelist_filename_, 531 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 532 base::Unretained(this))); 533 534 std::vector<SBAddFullHash> full_hashes; 535 if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { 536 LoadWhitelist(full_hashes, &csd_whitelist_); 537 } else { 538 WhitelistEverything(&csd_whitelist_); 539 } 540 } else { 541 WhitelistEverything(&csd_whitelist_); // Just to be safe. 542 } 543 544 if (download_whitelist_store_.get()) { 545 download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base); 546 download_whitelist_store_->Init( 547 download_whitelist_filename_, 548 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 549 base::Unretained(this))); 550 551 std::vector<SBAddFullHash> full_hashes; 552 if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) { 553 LoadWhitelist(full_hashes, &download_whitelist_); 554 } else { 555 WhitelistEverything(&download_whitelist_); 556 } 557 } else { 558 WhitelistEverything(&download_whitelist_); // Just to be safe. 559 } 560 561 if (extension_blacklist_store_.get()) { 562 extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base); 563 extension_blacklist_store_->Init( 564 extension_blacklist_filename_, 565 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 566 base::Unretained(this))); 567 } 568 569 if (side_effect_free_whitelist_store_.get()) { 570 side_effect_free_whitelist_filename_ = 571 SideEffectFreeWhitelistDBFilename(filename_base); 572 side_effect_free_whitelist_prefix_set_filename_ = 573 PrefixSetForFilename(side_effect_free_whitelist_filename_); 574 side_effect_free_whitelist_store_->Init( 575 side_effect_free_whitelist_filename_, 576 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 577 base::Unretained(this))); 578 579 // If there is no database, the filter cannot be used. 580 base::File::Info db_info; 581 if (base::GetFileInfo(side_effect_free_whitelist_filename_, &db_info) 582 && db_info.size != 0) { 583 const base::TimeTicks before = base::TimeTicks::Now(); 584 side_effect_free_whitelist_prefix_set_ = 585 safe_browsing::PrefixSet::LoadFile( 586 side_effect_free_whitelist_prefix_set_filename_); 587 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad", 588 base::TimeTicks::Now() - before); 589 if (!side_effect_free_whitelist_prefix_set_.get()) 590 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ); 591 } 592 } else { 593 // Delete any files of the side-effect free sidelist that may be around 594 // from when it was previously enabled. 595 SafeBrowsingStoreFile::DeleteStore( 596 SideEffectFreeWhitelistDBFilename(filename_base)); 597 } 598 599 if (ip_blacklist_store_.get()) { 600 ip_blacklist_filename_ = IpBlacklistDBFilename(filename_base); 601 ip_blacklist_store_->Init( 602 ip_blacklist_filename_, 603 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 604 base::Unretained(this))); 605 606 std::vector<SBAddFullHash> full_hashes; 607 if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) { 608 LoadIpBlacklist(full_hashes); 609 } else { 610 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 611 } 612 } 613} 614 615bool SafeBrowsingDatabaseNew::ResetDatabase() { 616 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 617 618 // Delete files on disk. 619 // TODO(shess): Hard to see where one might want to delete without a 620 // reset. Perhaps inline |Delete()|? 621 if (!Delete()) 622 return false; 623 624 // Reset objects in memory. 625 { 626 base::AutoLock locked(lookup_lock_); 627 cached_browse_hashes_.clear(); 628 prefix_miss_cache_.clear(); 629 browse_prefix_set_.reset(); 630 side_effect_free_whitelist_prefix_set_.reset(); 631 ip_blacklist_.clear(); 632 } 633 // Wants to acquire the lock itself. 634 WhitelistEverything(&csd_whitelist_); 635 WhitelistEverything(&download_whitelist_); 636 return true; 637} 638 639bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 640 const GURL& url, 641 std::vector<SBPrefix>* prefix_hits, 642 std::vector<SBFullHashResult>* cache_hits) { 643 // Clear the results first. 644 prefix_hits->clear(); 645 cache_hits->clear(); 646 647 std::vector<SBFullHash> full_hashes; 648 BrowseFullHashesToCheck(url, false, &full_hashes); 649 if (full_hashes.empty()) 650 return false; 651 652 // This function is called on the I/O thread, prevent changes to 653 // filter and caches. 654 base::AutoLock locked(lookup_lock_); 655 656 // |browse_prefix_set_| is empty until it is either read from disk, or the 657 // first update populates it. Bail out without a hit if not yet 658 // available. 659 if (!browse_prefix_set_.get()) 660 return false; 661 662 size_t miss_count = 0; 663 for (size_t i = 0; i < full_hashes.size(); ++i) { 664 if (browse_prefix_set_->Exists(full_hashes[i])) { 665 const SBPrefix prefix = full_hashes[i].prefix; 666 prefix_hits->push_back(prefix); 667 if (prefix_miss_cache_.count(prefix) > 0) 668 ++miss_count; 669 } 670 } 671 672 // If all the prefixes are cached as 'misses', don't issue a GetHash. 673 if (miss_count == prefix_hits->size()) 674 return false; 675 676 // Find matching cached gethash responses. 677 std::sort(prefix_hits->begin(), prefix_hits->end()); 678 GetCachedFullHashesForBrowse(*prefix_hits, cached_browse_hashes_, cache_hits); 679 680 return true; 681} 682 683bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 684 const std::vector<GURL>& urls, 685 std::vector<SBPrefix>* prefix_hits) { 686 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 687 688 // Ignore this check when download checking is not enabled. 689 if (!download_store_.get()) 690 return false; 691 692 std::vector<SBPrefix> prefixes; 693 GetDownloadUrlPrefixes(urls, &prefixes); 694 return MatchAddPrefixes(download_store_.get(), 695 safe_browsing_util::BINURL % 2, 696 prefixes, 697 prefix_hits); 698} 699 700bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { 701 // This method is theoretically thread-safe but we expect all calls to 702 // originate from the IO thread. 703 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 704 std::vector<SBFullHash> full_hashes; 705 BrowseFullHashesToCheck(url, true, &full_hashes); 706 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 707} 708 709bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) { 710 std::vector<SBFullHash> full_hashes; 711 BrowseFullHashesToCheck(url, true, &full_hashes); 712 return ContainsWhitelistedHashes(download_whitelist_, full_hashes); 713} 714 715bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes( 716 const std::vector<SBPrefix>& prefixes, 717 std::vector<SBPrefix>* prefix_hits) { 718 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 719 if (!extension_blacklist_store_) 720 return false; 721 722 return MatchAddPrefixes(extension_blacklist_store_.get(), 723 safe_browsing_util::EXTENSIONBLACKLIST % 2, 724 prefixes, 725 prefix_hits); 726} 727 728bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl( 729 const GURL& url) { 730 std::string host; 731 std::string path; 732 std::string query; 733 safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query); 734 std::string url_to_check = host + path; 735 if (!query.empty()) 736 url_to_check += "?" + query; 737 SBFullHash full_hash = SBFullHashForString(url_to_check); 738 739 // This function can be called on any thread, so lock against any changes 740 base::AutoLock locked(lookup_lock_); 741 742 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read 743 // from disk, or the first update populates it. Bail out without a hit if 744 // not yet available. 745 if (!side_effect_free_whitelist_prefix_set_.get()) 746 return false; 747 748 return side_effect_free_whitelist_prefix_set_->Exists(full_hash); 749} 750 751bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) { 752 net::IPAddressNumber ip_number; 753 if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) 754 return false; 755 if (ip_number.size() == net::kIPv4AddressSize) 756 ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number); 757 if (ip_number.size() != net::kIPv6AddressSize) 758 return false; // better safe than sorry. 759 760 // This function can be called from any thread. 761 base::AutoLock locked(lookup_lock_); 762 for (IPBlacklist::const_iterator it = ip_blacklist_.begin(); 763 it != ip_blacklist_.end(); 764 ++it) { 765 const std::string& mask = it->first; 766 DCHECK_EQ(mask.size(), ip_number.size()); 767 std::string subnet(net::kIPv6AddressSize, '\0'); 768 for (size_t i = 0; i < net::kIPv6AddressSize; ++i) { 769 subnet[i] = ip_number[i] & mask[i]; 770 } 771 const std::string hash = base::SHA1HashString(subnet); 772 DVLOG(2) << "Lookup Malware IP: " 773 << " ip:" << ip_address 774 << " mask:" << base::HexEncode(mask.data(), mask.size()) 775 << " subnet:" << base::HexEncode(subnet.data(), subnet.size()) 776 << " hash:" << base::HexEncode(hash.data(), hash.size()); 777 if (it->second.count(hash) > 0) { 778 return true; 779 } 780 } 781 return false; 782} 783 784bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString( 785 const std::string& str) { 786 std::vector<SBFullHash> hashes; 787 hashes.push_back(SBFullHashForString(str)); 788 return ContainsWhitelistedHashes(download_whitelist_, hashes); 789} 790 791bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes( 792 const SBWhitelist& whitelist, 793 const std::vector<SBFullHash>& hashes) { 794 base::AutoLock l(lookup_lock_); 795 if (whitelist.second) 796 return true; 797 for (std::vector<SBFullHash>::const_iterator it = hashes.begin(); 798 it != hashes.end(); ++it) { 799 if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), 800 *it, SBFullHashLess)) { 801 return true; 802 } 803 } 804 return false; 805} 806 807// Helper to insert add-chunk entries. 808void SafeBrowsingDatabaseNew::InsertAddChunk( 809 SafeBrowsingStore* store, 810 const safe_browsing_util::ListType list_id, 811 const SBChunkData& chunk_data) { 812 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 813 DCHECK(store); 814 815 // The server can give us a chunk that we already have because 816 // it's part of a range. Don't add it again. 817 const int chunk_id = chunk_data.ChunkNumber(); 818 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 819 if (store->CheckAddChunk(encoded_chunk_id)) 820 return; 821 822 store->SetAddChunk(encoded_chunk_id); 823 if (chunk_data.IsPrefix()) { 824 const size_t c = chunk_data.PrefixCount(); 825 for (size_t i = 0; i < c; ++i) { 826 STATS_COUNTER("SB.PrefixAdd", 1); 827 store->WriteAddPrefix(encoded_chunk_id, chunk_data.PrefixAt(i)); 828 } 829 } else { 830 const size_t c = chunk_data.FullHashCount(); 831 for (size_t i = 0; i < c; ++i) { 832 STATS_COUNTER("SB.PrefixAddFull", 1); 833 store->WriteAddHash(encoded_chunk_id, chunk_data.FullHashAt(i)); 834 } 835 } 836} 837 838// Helper to insert sub-chunk entries. 839void SafeBrowsingDatabaseNew::InsertSubChunk( 840 SafeBrowsingStore* store, 841 const safe_browsing_util::ListType list_id, 842 const SBChunkData& chunk_data) { 843 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 844 DCHECK(store); 845 846 // The server can give us a chunk that we already have because 847 // it's part of a range. Don't add it again. 848 const int chunk_id = chunk_data.ChunkNumber(); 849 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 850 if (store->CheckSubChunk(encoded_chunk_id)) 851 return; 852 853 store->SetSubChunk(encoded_chunk_id); 854 if (chunk_data.IsPrefix()) { 855 const size_t c = chunk_data.PrefixCount(); 856 for (size_t i = 0; i < c; ++i) { 857 STATS_COUNTER("SB.PrefixSub", 1); 858 const int add_chunk_id = chunk_data.AddChunkNumberAt(i); 859 const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id); 860 store->WriteSubPrefix(encoded_chunk_id, encoded_add_chunk_id, 861 chunk_data.PrefixAt(i)); 862 } 863 } else { 864 const size_t c = chunk_data.FullHashCount(); 865 for (size_t i = 0; i < c; ++i) { 866 STATS_COUNTER("SB.PrefixSubFull", 1); 867 const int add_chunk_id = chunk_data.AddChunkNumberAt(i); 868 const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id); 869 store->WriteSubHash(encoded_chunk_id, encoded_add_chunk_id, 870 chunk_data.FullHashAt(i)); 871 } 872 } 873} 874 875void SafeBrowsingDatabaseNew::InsertChunks( 876 const std::string& list_name, 877 const std::vector<SBChunkData*>& chunks) { 878 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 879 880 if (corruption_detected_ || chunks.empty()) 881 return; 882 883 const base::TimeTicks before = base::TimeTicks::Now(); 884 885 // TODO(shess): The caller should just pass list_id. 886 const safe_browsing_util::ListType list_id = 887 safe_browsing_util::GetListId(list_name); 888 889 SafeBrowsingStore* store = GetStore(list_id); 890 if (!store) return; 891 892 change_detected_ = true; 893 894 // TODO(shess): I believe that the list is always add or sub. Can this use 895 // that productively? 896 store->BeginChunk(); 897 for (size_t i = 0; i < chunks.size(); ++i) { 898 if (chunks[i]->IsAdd()) { 899 InsertAddChunk(store, list_id, *chunks[i]); 900 } else if (chunks[i]->IsSub()) { 901 InsertSubChunk(store, list_id, *chunks[i]); 902 } else { 903 NOTREACHED(); 904 } 905 } 906 store->FinishChunk(); 907 908 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before); 909} 910 911void SafeBrowsingDatabaseNew::DeleteChunks( 912 const std::vector<SBChunkDelete>& chunk_deletes) { 913 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 914 915 if (corruption_detected_ || chunk_deletes.empty()) 916 return; 917 918 const std::string& list_name = chunk_deletes.front().list_name; 919 const safe_browsing_util::ListType list_id = 920 safe_browsing_util::GetListId(list_name); 921 922 SafeBrowsingStore* store = GetStore(list_id); 923 if (!store) return; 924 925 change_detected_ = true; 926 927 for (size_t i = 0; i < chunk_deletes.size(); ++i) { 928 std::vector<int> chunk_numbers; 929 RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers); 930 for (size_t j = 0; j < chunk_numbers.size(); ++j) { 931 const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id); 932 if (chunk_deletes[i].is_sub_del) 933 store->DeleteSubChunk(encoded_chunk_id); 934 else 935 store->DeleteAddChunk(encoded_chunk_id); 936 } 937 } 938} 939 940void SafeBrowsingDatabaseNew::CacheHashResults( 941 const std::vector<SBPrefix>& prefixes, 942 const std::vector<SBFullHashResult>& full_hits, 943 const base::TimeDelta& cache_lifetime) { 944 const base::Time expire_after = base::Time::Now() + cache_lifetime; 945 946 // This is called on the I/O thread, lock against updates. 947 base::AutoLock locked(lookup_lock_); 948 949 if (full_hits.empty()) { 950 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); 951 return; 952 } 953 954 const size_t orig_size = cached_browse_hashes_.size(); 955 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); 956 iter != full_hits.end(); ++iter) { 957 if (iter->list_id == safe_browsing_util::MALWARE || 958 iter->list_id == safe_browsing_util::PHISH) { 959 SBFullHashCached cached_hash; 960 cached_hash.hash = iter->hash; 961 cached_hash.list_id = iter->list_id; 962 cached_hash.expire_after = expire_after; 963 cached_browse_hashes_.push_back(cached_hash); 964 } 965 } 966 967 // Sort new entries then merge with the previously-sorted entries. 968 std::vector<SBFullHashCached>::iterator 969 orig_end = cached_browse_hashes_.begin() + orig_size; 970 std::sort(orig_end, cached_browse_hashes_.end(), SBFullHashCachedPrefixLess); 971 std::inplace_merge(cached_browse_hashes_.begin(), 972 orig_end, cached_browse_hashes_.end(), 973 SBFullHashCachedPrefixLess); 974} 975 976bool SafeBrowsingDatabaseNew::UpdateStarted( 977 std::vector<SBListChunkRanges>* lists) { 978 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 979 DCHECK(lists); 980 981 // If |BeginUpdate()| fails, reset the database. 982 if (!browse_store_->BeginUpdate()) { 983 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 984 HandleCorruptDatabase(); 985 return false; 986 } 987 988 if (download_store_.get() && !download_store_->BeginUpdate()) { 989 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN); 990 HandleCorruptDatabase(); 991 return false; 992 } 993 994 if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { 995 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 996 HandleCorruptDatabase(); 997 return false; 998 } 999 1000 if (download_whitelist_store_.get() && 1001 !download_whitelist_store_->BeginUpdate()) { 1002 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 1003 HandleCorruptDatabase(); 1004 return false; 1005 } 1006 1007 if (extension_blacklist_store_ && 1008 !extension_blacklist_store_->BeginUpdate()) { 1009 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN); 1010 HandleCorruptDatabase(); 1011 return false; 1012 } 1013 1014 if (side_effect_free_whitelist_store_ && 1015 !side_effect_free_whitelist_store_->BeginUpdate()) { 1016 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN); 1017 HandleCorruptDatabase(); 1018 return false; 1019 } 1020 1021 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { 1022 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); 1023 HandleCorruptDatabase(); 1024 return false; 1025 } 1026 1027 UpdateChunkRangesForLists(browse_store_.get(), 1028 safe_browsing_util::kMalwareList, 1029 safe_browsing_util::kPhishingList, 1030 lists); 1031 1032 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been 1033 // deprecated. Code to delete the list from the store shows ~15k hits/day as 1034 // of Feb 2014, so it has been removed. Everything _should_ be resilient to 1035 // extra data of that sort. 1036 UpdateChunkRangesForList(download_store_.get(), 1037 safe_browsing_util::kBinUrlList, lists); 1038 1039 UpdateChunkRangesForList(csd_whitelist_store_.get(), 1040 safe_browsing_util::kCsdWhiteList, lists); 1041 1042 UpdateChunkRangesForList(download_whitelist_store_.get(), 1043 safe_browsing_util::kDownloadWhiteList, lists); 1044 1045 UpdateChunkRangesForList(extension_blacklist_store_.get(), 1046 safe_browsing_util::kExtensionBlacklist, lists); 1047 1048 UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(), 1049 safe_browsing_util::kSideEffectFreeWhitelist, lists); 1050 1051 UpdateChunkRangesForList(ip_blacklist_store_.get(), 1052 safe_browsing_util::kIPBlacklist, lists); 1053 1054 corruption_detected_ = false; 1055 change_detected_ = false; 1056 return true; 1057} 1058 1059void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { 1060 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1061 1062 // The update may have failed due to corrupt storage (for instance, 1063 // an excessive number of invalid add_chunks and sub_chunks). 1064 // Double-check that the databases are valid. 1065 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk 1066 // sections would allow throwing a corruption error in 1067 // UpdateStarted(). 1068 if (!update_succeeded) { 1069 if (!browse_store_->CheckValidity()) 1070 DLOG(ERROR) << "Safe-browsing browse database corrupt."; 1071 1072 if (download_store_.get() && !download_store_->CheckValidity()) 1073 DLOG(ERROR) << "Safe-browsing download database corrupt."; 1074 1075 if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity()) 1076 DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt."; 1077 1078 if (download_whitelist_store_.get() && 1079 !download_whitelist_store_->CheckValidity()) { 1080 DLOG(ERROR) << "Safe-browsing download whitelist database corrupt."; 1081 } 1082 1083 if (extension_blacklist_store_ && 1084 !extension_blacklist_store_->CheckValidity()) { 1085 DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt."; 1086 } 1087 1088 if (side_effect_free_whitelist_store_ && 1089 !side_effect_free_whitelist_store_->CheckValidity()) { 1090 DLOG(ERROR) << "Safe-browsing side-effect free whitelist database " 1091 << "corrupt."; 1092 } 1093 1094 if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) { 1095 DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt."; 1096 } 1097 } 1098 1099 if (corruption_detected_) 1100 return; 1101 1102 // Unroll the transaction if there was a protocol error or if the 1103 // transaction was empty. This will leave the prefix set, the 1104 // pending hashes, and the prefix miss cache in place. 1105 if (!update_succeeded || !change_detected_) { 1106 // Track empty updates to answer questions at http://crbug.com/72216 . 1107 if (update_succeeded && !change_detected_) 1108 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0); 1109 browse_store_->CancelUpdate(); 1110 if (download_store_.get()) 1111 download_store_->CancelUpdate(); 1112 if (csd_whitelist_store_.get()) 1113 csd_whitelist_store_->CancelUpdate(); 1114 if (download_whitelist_store_.get()) 1115 download_whitelist_store_->CancelUpdate(); 1116 if (extension_blacklist_store_) 1117 extension_blacklist_store_->CancelUpdate(); 1118 if (side_effect_free_whitelist_store_) 1119 side_effect_free_whitelist_store_->CancelUpdate(); 1120 if (ip_blacklist_store_) 1121 ip_blacklist_store_->CancelUpdate(); 1122 return; 1123 } 1124 1125 if (download_store_) { 1126 int64 size_bytes = UpdateHashPrefixStore( 1127 download_filename_, 1128 download_store_.get(), 1129 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); 1130 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", 1131 static_cast<int>(size_bytes / 1024)); 1132 } 1133 1134 UpdateBrowseStore(); 1135 UpdateWhitelistStore(csd_whitelist_filename_, 1136 csd_whitelist_store_.get(), 1137 &csd_whitelist_); 1138 UpdateWhitelistStore(download_whitelist_filename_, 1139 download_whitelist_store_.get(), 1140 &download_whitelist_); 1141 1142 if (extension_blacklist_store_) { 1143 int64 size_bytes = UpdateHashPrefixStore( 1144 extension_blacklist_filename_, 1145 extension_blacklist_store_.get(), 1146 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH); 1147 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes", 1148 static_cast<int>(size_bytes / 1024)); 1149 } 1150 1151 if (side_effect_free_whitelist_store_) 1152 UpdateSideEffectFreeWhitelistStore(); 1153 1154 if (ip_blacklist_store_) 1155 UpdateIpBlacklistStore(); 1156} 1157 1158void SafeBrowsingDatabaseNew::UpdateWhitelistStore( 1159 const base::FilePath& store_filename, 1160 SafeBrowsingStore* store, 1161 SBWhitelist* whitelist) { 1162 if (!store) 1163 return; 1164 1165 // Note: |builder| will not be empty. The current data store implementation 1166 // stores all full-length hashes as both full and prefix hashes. 1167 safe_browsing::PrefixSetBuilder builder; 1168 std::vector<SBAddFullHash> full_hashes; 1169 if (!store->FinishUpdate(&builder, &full_hashes)) { 1170 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); 1171 WhitelistEverything(whitelist); 1172 return; 1173 } 1174 1175#if defined(OS_MACOSX) 1176 base::mac::SetFileBackupExclusion(store_filename); 1177#endif 1178 1179 LoadWhitelist(full_hashes, whitelist); 1180} 1181 1182int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( 1183 const base::FilePath& store_filename, 1184 SafeBrowsingStore* store, 1185 FailureType failure_type) { 1186 // These results are not used after this call. Simply ignore the 1187 // returned value after FinishUpdate(...). 1188 safe_browsing::PrefixSetBuilder builder; 1189 std::vector<SBAddFullHash> add_full_hashes_result; 1190 1191 if (!store->FinishUpdate(&builder, &add_full_hashes_result)) 1192 RecordFailure(failure_type); 1193 1194#if defined(OS_MACOSX) 1195 base::mac::SetFileBackupExclusion(store_filename); 1196#endif 1197 1198 return GetFileSizeOrZero(store_filename); 1199} 1200 1201void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1202 // Measure the amount of IO during the filter build. 1203 base::IoCounters io_before, io_after; 1204 base::ProcessHandle handle = base::Process::Current().handle(); 1205 scoped_ptr<base::ProcessMetrics> metric( 1206#if !defined(OS_MACOSX) 1207 base::ProcessMetrics::CreateProcessMetrics(handle) 1208#else 1209 // Getting stats only for the current process is enough, so NULL is fine. 1210 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1211#endif 1212 ); 1213 1214 // IoCounters are currently not supported on Mac, and may not be 1215 // available for Linux, so we check the result and only show IO 1216 // stats if they are available. 1217 const bool got_counters = metric->GetIOCounters(&io_before); 1218 1219 const base::TimeTicks before = base::TimeTicks::Now(); 1220 1221 // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the 1222 // fly? Other clients use the SBAddFullHash vector, but AFAICT they only use 1223 // the SBFullHash portion. It would need an accessor on PrefixSet. 1224 safe_browsing::PrefixSetBuilder builder; 1225 std::vector<SBAddFullHash> add_full_hashes; 1226 if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) { 1227 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1228 return; 1229 } 1230 1231 std::vector<SBFullHash> full_hash_results; 1232 for (size_t i = 0; i < add_full_hashes.size(); ++i) { 1233 full_hash_results.push_back(add_full_hashes[i].full_hash); 1234 } 1235 1236 scoped_ptr<safe_browsing::PrefixSet> 1237 prefix_set(builder.GetPrefixSet(full_hash_results)); 1238 1239 // Swap in the newly built filter and cache. 1240 { 1241 base::AutoLock locked(lookup_lock_); 1242 1243 // TODO(shess): If |CacheHashResults()| is posted between the 1244 // earlier lock and this clear, those pending hashes will be lost. 1245 // It could be fixed by only removing hashes which were collected 1246 // at the earlier point. I believe that is fail-safe as-is (the 1247 // hash will be fetched again). 1248 cached_browse_hashes_.clear(); 1249 prefix_miss_cache_.clear(); 1250 browse_prefix_set_.swap(prefix_set); 1251 } 1252 1253 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); 1254 1255 // Persist the prefix set to disk. Since only this thread changes 1256 // |browse_prefix_set_|, there is no need to lock. 1257 WritePrefixSet(); 1258 1259 // Gather statistics. 1260 if (got_counters && metric->GetIOCounters(&io_after)) { 1261 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes", 1262 static_cast<int>(io_after.ReadTransferCount - 1263 io_before.ReadTransferCount) / 1024); 1264 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes", 1265 static_cast<int>(io_after.WriteTransferCount - 1266 io_before.WriteTransferCount) / 1024); 1267 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", 1268 static_cast<int>(io_after.ReadOperationCount - 1269 io_before.ReadOperationCount)); 1270 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", 1271 static_cast<int>(io_after.WriteOperationCount - 1272 io_before.WriteOperationCount)); 1273 } 1274 1275 int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_); 1276 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes", 1277 static_cast<int>(file_size / 1024)); 1278 file_size = GetFileSizeOrZero(browse_filename_); 1279 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1280 static_cast<int>(file_size / 1024)); 1281 1282#if defined(OS_MACOSX) 1283 base::mac::SetFileBackupExclusion(browse_filename_); 1284#endif 1285} 1286 1287void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { 1288 safe_browsing::PrefixSetBuilder builder; 1289 std::vector<SBAddFullHash> add_full_hashes_result; 1290 1291 if (!side_effect_free_whitelist_store_->FinishUpdate( 1292 &builder, &add_full_hashes_result)) { 1293 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); 1294 return; 1295 } 1296 scoped_ptr<safe_browsing::PrefixSet> 1297 prefix_set(builder.GetPrefixSetNoHashes()); 1298 1299 // Swap in the newly built prefix set. 1300 { 1301 base::AutoLock locked(lookup_lock_); 1302 side_effect_free_whitelist_prefix_set_.swap(prefix_set); 1303 } 1304 1305 const base::TimeTicks before = base::TimeTicks::Now(); 1306 const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile( 1307 side_effect_free_whitelist_prefix_set_filename_); 1308 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite", 1309 base::TimeTicks::Now() - before); 1310 1311 if (!write_ok) 1312 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE); 1313 1314 // Gather statistics. 1315 int64 file_size = GetFileSizeOrZero( 1316 side_effect_free_whitelist_prefix_set_filename_); 1317 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes", 1318 static_cast<int>(file_size / 1024)); 1319 file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_); 1320 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes", 1321 static_cast<int>(file_size / 1024)); 1322 1323#if defined(OS_MACOSX) 1324 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_); 1325 base::mac::SetFileBackupExclusion( 1326 side_effect_free_whitelist_prefix_set_filename_); 1327#endif 1328} 1329 1330void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { 1331 // Note: prefixes will not be empty. The current data store implementation 1332 // stores all full-length hashes as both full and prefix hashes. 1333 safe_browsing::PrefixSetBuilder builder; 1334 std::vector<SBAddFullHash> full_hashes; 1335 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) { 1336 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); 1337 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 1338 return; 1339 } 1340 1341#if defined(OS_MACOSX) 1342 base::mac::SetFileBackupExclusion(ip_blacklist_filename_); 1343#endif 1344 1345 LoadIpBlacklist(full_hashes); 1346} 1347 1348void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { 1349 // Reset the database after the current task has unwound (but only 1350 // reset once within the scope of a given task). 1351 if (!reset_factory_.HasWeakPtrs()) { 1352 RecordFailure(FAILURE_DATABASE_CORRUPT); 1353 base::MessageLoop::current()->PostTask(FROM_HERE, 1354 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase, 1355 reset_factory_.GetWeakPtr())); 1356 } 1357} 1358 1359void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { 1360 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER); 1361 corruption_detected_ = true; // Stop updating the database. 1362 ResetDatabase(); 1363 1364 // NOTE(shess): ResetDatabase() should remove the corruption, so this should 1365 // only happen once. If you are here because you are hitting this after a 1366 // restart, then I would be very interested in working with you to figure out 1367 // what is happening, since it may affect real users. 1368 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset"; 1369} 1370 1371// TODO(shess): I'm not clear why this code doesn't have any 1372// real error-handling. 1373void SafeBrowsingDatabaseNew::LoadPrefixSet() { 1374 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1375 DCHECK(!browse_prefix_set_filename_.empty()); 1376 1377 // If there is no database, the filter cannot be used. 1378 base::File::Info db_info; 1379 if (!base::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0) 1380 return; 1381 1382 // Cleanup any stale bloom filter (no longer used). 1383 // TODO(shess): Track failure to delete? 1384 base::FilePath bloom_filter_filename = 1385 BloomFilterForFilename(browse_filename_); 1386 base::DeleteFile(bloom_filter_filename, false); 1387 1388 const base::TimeTicks before = base::TimeTicks::Now(); 1389 browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile( 1390 browse_prefix_set_filename_); 1391 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before); 1392 1393 if (!browse_prefix_set_.get()) 1394 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ); 1395} 1396 1397bool SafeBrowsingDatabaseNew::Delete() { 1398 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1399 1400 const bool r1 = browse_store_->Delete(); 1401 if (!r1) 1402 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1403 1404 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 1405 if (!r2) 1406 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1407 1408 const bool r3 = csd_whitelist_store_.get() ? 1409 csd_whitelist_store_->Delete() : true; 1410 if (!r3) 1411 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1412 1413 const bool r4 = download_whitelist_store_.get() ? 1414 download_whitelist_store_->Delete() : true; 1415 if (!r4) 1416 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1417 1418 base::FilePath bloom_filter_filename = 1419 BloomFilterForFilename(browse_filename_); 1420 const bool r5 = base::DeleteFile(bloom_filter_filename, false); 1421 if (!r5) 1422 RecordFailure(FAILURE_DATABASE_FILTER_DELETE); 1423 1424 const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false); 1425 if (!r6) 1426 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE); 1427 1428 const bool r7 = base::DeleteFile(extension_blacklist_filename_, false); 1429 if (!r7) 1430 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE); 1431 1432 const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_, 1433 false); 1434 if (!r8) 1435 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE); 1436 1437 const bool r9 = base::DeleteFile( 1438 side_effect_free_whitelist_prefix_set_filename_, 1439 false); 1440 if (!r9) 1441 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE); 1442 1443 const bool r10 = base::DeleteFile(ip_blacklist_filename_, false); 1444 if (!r10) 1445 RecordFailure(FAILURE_IP_BLACKLIST_DELETE); 1446 1447 return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10; 1448} 1449 1450void SafeBrowsingDatabaseNew::WritePrefixSet() { 1451 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1452 1453 if (!browse_prefix_set_.get()) 1454 return; 1455 1456 const base::TimeTicks before = base::TimeTicks::Now(); 1457 const bool write_ok = browse_prefix_set_->WriteFile( 1458 browse_prefix_set_filename_); 1459 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before); 1460 1461 if (!write_ok) 1462 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE); 1463 1464#if defined(OS_MACOSX) 1465 base::mac::SetFileBackupExclusion(browse_prefix_set_filename_); 1466#endif 1467} 1468 1469void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) { 1470 base::AutoLock locked(lookup_lock_); 1471 whitelist->second = true; 1472 whitelist->first.clear(); 1473} 1474 1475void SafeBrowsingDatabaseNew::LoadWhitelist( 1476 const std::vector<SBAddFullHash>& full_hashes, 1477 SBWhitelist* whitelist) { 1478 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1479 if (full_hashes.size() > kMaxWhitelistSize) { 1480 WhitelistEverything(whitelist); 1481 return; 1482 } 1483 1484 std::vector<SBFullHash> new_whitelist; 1485 new_whitelist.reserve(full_hashes.size()); 1486 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1487 it != full_hashes.end(); ++it) { 1488 new_whitelist.push_back(it->full_hash); 1489 } 1490 std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess); 1491 1492 SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl); 1493 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), 1494 kill_switch, SBFullHashLess)) { 1495 // The kill switch is whitelisted hence we whitelist all URLs. 1496 WhitelistEverything(whitelist); 1497 } else { 1498 base::AutoLock locked(lookup_lock_); 1499 whitelist->second = false; 1500 whitelist->first.swap(new_whitelist); 1501 } 1502} 1503 1504void SafeBrowsingDatabaseNew::LoadIpBlacklist( 1505 const std::vector<SBAddFullHash>& full_hashes) { 1506 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1507 IPBlacklist new_blacklist; 1508 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1509 it != full_hashes.end(); 1510 ++it) { 1511 const char* full_hash = it->full_hash.full_hash; 1512 DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash)); 1513 // The format of the IP blacklist is: 1514 // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes. 1515 std::string hashed_ip_prefix(full_hash, base::kSHA1Length); 1516 size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]); 1517 if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) { 1518 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID); 1519 new_blacklist.clear(); // Load empty blacklist. 1520 break; 1521 } 1522 1523 // We precompute the mask for the given subnet size to speed up lookups. 1524 // Basically we need to create a 16B long string which has the highest 1525 // |size| bits sets to one. 1526 std::string mask(net::kIPv6AddressSize, '\0'); 1527 mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF'); 1528 if ((prefix_size % 8) != 0) { 1529 mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8)); 1530 } 1531 DVLOG(2) << "Inserting malicious IP: " 1532 << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length) 1533 << " mask:" << base::HexEncode(mask.data(), mask.size()) 1534 << " prefix_size:" << prefix_size 1535 << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(), 1536 hashed_ip_prefix.size()); 1537 new_blacklist[mask].insert(hashed_ip_prefix); 1538 } 1539 1540 base::AutoLock locked(lookup_lock_); 1541 ip_blacklist_.swap(new_blacklist); 1542} 1543 1544bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { 1545 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); 1546 std::vector<SBFullHash> full_hashes; 1547 full_hashes.push_back(malware_kill_switch); 1548 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 1549} 1550 1551bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() { 1552 return csd_whitelist_.second; 1553} 1554