safe_browsing_database.cc revision bbcdd45c55eb7c4641ab97aef9889b0fc828e7d3
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/safe_browsing/safe_browsing_database.h" 6 7#include <algorithm> 8#include <iterator> 9 10#include "base/bind.h" 11#include "base/file_util.h" 12#include "base/message_loop/message_loop.h" 13#include "base/metrics/histogram.h" 14#include "base/metrics/stats_counters.h" 15#include "base/process/process_metrics.h" 16#include "base/time/time.h" 17#include "chrome/browser/safe_browsing/prefix_set.h" 18#include "chrome/browser/safe_browsing/safe_browsing_store_file.h" 19#include "content/public/browser/browser_thread.h" 20#include "crypto/sha2.h" 21#include "url/gurl.h" 22 23#if defined(OS_MACOSX) 24#include "base/mac/mac_util.h" 25#endif 26 27using content::BrowserThread; 28 29namespace { 30 31// Filename suffix for the bloom filter. 32const base::FilePath::CharType kBloomFilterFile[] = 33 FILE_PATH_LITERAL(" Filter 2"); 34// Filename suffix for the prefix set. 35const base::FilePath::CharType kPrefixSetFile[] = 36 FILE_PATH_LITERAL(" Prefix Set"); 37// Filename suffix for download store. 38const base::FilePath::CharType kDownloadDBFile[] = 39 FILE_PATH_LITERAL(" Download"); 40// Filename suffix for client-side phishing detection whitelist store. 41const base::FilePath::CharType kCsdWhitelistDBFile[] = 42 FILE_PATH_LITERAL(" Csd Whitelist"); 43// Filename suffix for the download whitelist store. 44const base::FilePath::CharType kDownloadWhitelistDBFile[] = 45 FILE_PATH_LITERAL(" Download Whitelist"); 46// Filename suffix for the extension blacklist store. 47const base::FilePath::CharType kExtensionBlacklistDBFile[] = 48 FILE_PATH_LITERAL(" Extension Blacklist"); 49// Filename suffix for the side-effect free whitelist store. 50const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] = 51 FILE_PATH_LITERAL(" Side-Effect Free Whitelist"); 52// Filename suffix for browse store. 53// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. 54// Unfortunately, to change the name implies lots of transition code 55// for little benefit. If/when file formats change (say to put all 56// the data in one file), that would be a convenient point to rectify 57// this. 58const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 59 60// The maximum staleness for a cached entry. 61const int kMaxStalenessMinutes = 45; 62 63// Maximum number of entries we allow in any of the whitelists. 64// If a whitelist on disk contains more entries then all lookups to 65// the whitelist will be considered a match. 66const size_t kMaxWhitelistSize = 5000; 67 68// If the hash of this exact expression is on a whitelist then all 69// lookups to this whitelist will be considered a match. 70const char kWhitelistKillSwitchUrl[] = 71 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! 72 73// To save space, the incoming |chunk_id| and |list_id| are combined 74// into an |encoded_chunk_id| for storage by shifting the |list_id| 75// into the low-order bits. These functions decode that information. 76// TODO(lzheng): It was reasonable when database is saved in sqlite, but 77// there should be better ways to save chunk_id and list_id after we use 78// SafeBrowsingStoreFile. 79int GetListIdBit(const int encoded_chunk_id) { 80 return encoded_chunk_id & 1; 81} 82int DecodeChunkId(int encoded_chunk_id) { 83 return encoded_chunk_id >> 1; 84} 85int EncodeChunkId(const int chunk, const int list_id) { 86 DCHECK_NE(list_id, safe_browsing_util::INVALID); 87 return chunk << 1 | list_id % 2; 88} 89 90// Generate the set of full hashes to check for |url|. If 91// |include_whitelist_hashes| is true we will generate additional path-prefixes 92// to match against the csd whitelist. E.g., if the path-prefix /foo is on the 93// whitelist it should also match /foo/bar which is not the case for all the 94// other lists. We'll also always add a pattern for the empty path. 95// TODO(shess): This function is almost the same as 96// |CompareFullHashes()| in safe_browsing_util.cc, except that code 97// does an early exit on match. Since match should be the infrequent 98// case (phishing or malware found), consider combining this function 99// with that one. 100void BrowseFullHashesToCheck(const GURL& url, 101 bool include_whitelist_hashes, 102 std::vector<SBFullHash>* full_hashes) { 103 std::vector<std::string> hosts; 104 if (url.HostIsIPAddress()) { 105 hosts.push_back(url.host()); 106 } else { 107 safe_browsing_util::GenerateHostsToCheck(url, &hosts); 108 } 109 110 std::vector<std::string> paths; 111 safe_browsing_util::GeneratePathsToCheck(url, &paths); 112 113 for (size_t i = 0; i < hosts.size(); ++i) { 114 for (size_t j = 0; j < paths.size(); ++j) { 115 const std::string& path = paths[j]; 116 SBFullHash full_hash; 117 crypto::SHA256HashString(hosts[i] + path, &full_hash, 118 sizeof(full_hash)); 119 full_hashes->push_back(full_hash); 120 121 // We may have /foo as path-prefix in the whitelist which should 122 // also match with /foo/bar and /foo?bar. Hence, for every path 123 // that ends in '/' we also add the path without the slash. 124 if (include_whitelist_hashes && 125 path.size() > 1 && 126 path[path.size() - 1] == '/') { 127 crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1), 128 &full_hash, sizeof(full_hash)); 129 full_hashes->push_back(full_hash); 130 } 131 } 132 } 133} 134 135// Get the prefixes matching the download |urls|. 136void GetDownloadUrlPrefixes(const std::vector<GURL>& urls, 137 std::vector<SBPrefix>* prefixes) { 138 std::vector<SBFullHash> full_hashes; 139 for (size_t i = 0; i < urls.size(); ++i) 140 BrowseFullHashesToCheck(urls[i], false, &full_hashes); 141 142 for (size_t i = 0; i < full_hashes.size(); ++i) 143 prefixes->push_back(full_hashes[i].prefix); 144} 145 146// Helper function to compare addprefixes in |store| with |prefixes|. 147// The |list_bit| indicates which list (url or hash) to compare. 148// 149// Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain 150// the actual matching prefixes. 151bool MatchAddPrefixes(SafeBrowsingStore* store, 152 int list_bit, 153 const std::vector<SBPrefix>& prefixes, 154 std::vector<SBPrefix>* prefix_hits) { 155 prefix_hits->clear(); 156 bool found_match = false; 157 158 SBAddPrefixes add_prefixes; 159 store->GetAddPrefixes(&add_prefixes); 160 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 161 iter != add_prefixes.end(); ++iter) { 162 for (size_t j = 0; j < prefixes.size(); ++j) { 163 const SBPrefix& prefix = prefixes[j]; 164 if (prefix == iter->prefix && 165 GetListIdBit(iter->chunk_id) == list_bit) { 166 prefix_hits->push_back(prefix); 167 found_match = true; 168 } 169 } 170 } 171 return found_match; 172} 173 174// Find the entries in |full_hashes| with prefix in |prefix_hits|, and 175// add them to |full_hits| if not expired. "Not expired" is when 176// either |last_update| was recent enough, or the item has been 177// received recently enough. Expired items are not deleted because a 178// future update may make them acceptable again. 179// 180// For efficiency reasons the code walks |prefix_hits| and 181// |full_hashes| in parallel, so they must be sorted by prefix. 182void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, 183 const std::vector<SBAddFullHash>& full_hashes, 184 std::vector<SBFullHashResult>* full_hits, 185 base::Time last_update) { 186 const base::Time expire_time = 187 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes); 188 189 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin(); 190 std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin(); 191 192 while (piter != prefix_hits.end() && hiter != full_hashes.end()) { 193 if (*piter < hiter->full_hash.prefix) { 194 ++piter; 195 } else if (hiter->full_hash.prefix < *piter) { 196 ++hiter; 197 } else { 198 if (expire_time < last_update || 199 expire_time.ToTimeT() < hiter->received) { 200 SBFullHashResult result; 201 const int list_bit = GetListIdBit(hiter->chunk_id); 202 DCHECK(list_bit == safe_browsing_util::MALWARE || 203 list_bit == safe_browsing_util::PHISH); 204 const safe_browsing_util::ListType list_id = 205 static_cast<safe_browsing_util::ListType>(list_bit); 206 if (!safe_browsing_util::GetListName(list_id, &result.list_name)) 207 continue; 208 result.add_chunk_id = DecodeChunkId(hiter->chunk_id); 209 result.hash = hiter->full_hash; 210 full_hits->push_back(result); 211 } 212 213 // Only increment |hiter|, |piter| might have multiple hits. 214 ++hiter; 215 } 216 } 217} 218 219// This function generates a chunk range string for |chunks|. It 220// outputs one chunk range string per list and writes it to the 221// |list_ranges| vector. We expect |list_ranges| to already be of the 222// right size. E.g., if |chunks| contains chunks with two different 223// list ids then |list_ranges| must contain two elements. 224void GetChunkRanges(const std::vector<int>& chunks, 225 std::vector<std::string>* list_ranges) { 226 DCHECK_GT(list_ranges->size(), 0U); 227 DCHECK_LE(list_ranges->size(), 2U); 228 std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); 229 for (std::vector<int>::const_iterator iter = chunks.begin(); 230 iter != chunks.end(); ++iter) { 231 int mod_list_id = GetListIdBit(*iter); 232 DCHECK_GE(mod_list_id, 0); 233 DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); 234 decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); 235 } 236 for (size_t i = 0; i < decoded_chunks.size(); ++i) { 237 ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); 238 } 239} 240 241// Helper function to create chunk range lists for Browse related 242// lists. 243void UpdateChunkRanges(SafeBrowsingStore* store, 244 const std::vector<std::string>& listnames, 245 std::vector<SBListChunkRanges>* lists) { 246 DCHECK_GT(listnames.size(), 0U); 247 DCHECK_LE(listnames.size(), 2U); 248 std::vector<int> add_chunks; 249 std::vector<int> sub_chunks; 250 store->GetAddChunks(&add_chunks); 251 store->GetSubChunks(&sub_chunks); 252 253 std::vector<std::string> adds(listnames.size()); 254 std::vector<std::string> subs(listnames.size()); 255 GetChunkRanges(add_chunks, &adds); 256 GetChunkRanges(sub_chunks, &subs); 257 258 for (size_t i = 0; i < listnames.size(); ++i) { 259 const std::string& listname = listnames[i]; 260 DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, 261 static_cast<int>(i % 2)); 262 DCHECK_NE(safe_browsing_util::GetListId(listname), 263 safe_browsing_util::INVALID); 264 lists->push_back(SBListChunkRanges(listname)); 265 lists->back().adds.swap(adds[i]); 266 lists->back().subs.swap(subs[i]); 267 } 268} 269 270// Helper for deleting chunks left over from obsolete lists. 271void DeleteChunksFromStore(SafeBrowsingStore* store, int listid){ 272 std::vector<int> add_chunks; 273 size_t adds_deleted = 0; 274 store->GetAddChunks(&add_chunks); 275 for (std::vector<int>::const_iterator iter = add_chunks.begin(); 276 iter != add_chunks.end(); ++iter) { 277 if (GetListIdBit(*iter) == GetListIdBit(listid)) { 278 adds_deleted++; 279 store->DeleteAddChunk(*iter); 280 } 281 } 282 if (adds_deleted > 0) 283 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashAddsDeleted", adds_deleted); 284 285 std::vector<int> sub_chunks; 286 size_t subs_deleted = 0; 287 store->GetSubChunks(&sub_chunks); 288 for (std::vector<int>::const_iterator iter = sub_chunks.begin(); 289 iter != sub_chunks.end(); ++iter) { 290 if (GetListIdBit(*iter) == GetListIdBit(listid)) { 291 subs_deleted++; 292 store->DeleteSubChunk(*iter); 293 } 294 } 295 if (subs_deleted > 0) 296 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted); 297} 298 299// Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 300// safe_browsing_store.h orders on both chunk-id and prefix. 301bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 302 return a.full_hash.prefix < b.full_hash.prefix; 303} 304 305// This code always checks for non-zero file size. This helper makes 306// that less verbose. 307int64 GetFileSizeOrZero(const base::FilePath& file_path) { 308 int64 size_64; 309 if (!file_util::GetFileSize(file_path, &size_64)) 310 return 0; 311 return size_64; 312} 313 314} // namespace 315 316// The default SafeBrowsingDatabaseFactory. 317class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 318 public: 319 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 320 bool enable_download_protection, 321 bool enable_client_side_whitelist, 322 bool enable_download_whitelist, 323 bool enable_extension_blacklist, 324 bool enable_side_effect_free_whitelist) OVERRIDE { 325 return new SafeBrowsingDatabaseNew( 326 new SafeBrowsingStoreFile, 327 enable_download_protection ? new SafeBrowsingStoreFile : NULL, 328 enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL, 329 enable_download_whitelist ? new SafeBrowsingStoreFile : NULL, 330 enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL, 331 enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL); 332 } 333 334 SafeBrowsingDatabaseFactoryImpl() { } 335 336 private: 337 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl); 338}; 339 340// static 341SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; 342 343// Factory method, non-thread safe. Caller has to make sure this s called 344// on SafeBrowsing Thread. 345// TODO(shess): There's no need for a factory any longer. Convert 346// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() 347// callers just construct things directly. 348SafeBrowsingDatabase* SafeBrowsingDatabase::Create( 349 bool enable_download_protection, 350 bool enable_client_side_whitelist, 351 bool enable_download_whitelist, 352 bool enable_extension_blacklist, 353 bool enable_side_effect_free_whitelist) { 354 if (!factory_) 355 factory_ = new SafeBrowsingDatabaseFactoryImpl(); 356 return factory_->CreateSafeBrowsingDatabase( 357 enable_download_protection, 358 enable_client_side_whitelist, 359 enable_download_whitelist, 360 enable_extension_blacklist, 361 enable_side_effect_free_whitelist); 362} 363 364SafeBrowsingDatabase::~SafeBrowsingDatabase() { 365} 366 367// static 368base::FilePath SafeBrowsingDatabase::BrowseDBFilename( 369 const base::FilePath& db_base_filename) { 370 return base::FilePath(db_base_filename.value() + kBrowseDBFile); 371} 372 373// static 374base::FilePath SafeBrowsingDatabase::DownloadDBFilename( 375 const base::FilePath& db_base_filename) { 376 return base::FilePath(db_base_filename.value() + kDownloadDBFile); 377} 378 379// static 380base::FilePath SafeBrowsingDatabase::BloomFilterForFilename( 381 const base::FilePath& db_filename) { 382 return base::FilePath(db_filename.value() + kBloomFilterFile); 383} 384 385// static 386base::FilePath SafeBrowsingDatabase::PrefixSetForFilename( 387 const base::FilePath& db_filename) { 388 return base::FilePath(db_filename.value() + kPrefixSetFile); 389} 390 391// static 392base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( 393 const base::FilePath& db_filename) { 394 return base::FilePath(db_filename.value() + kCsdWhitelistDBFile); 395} 396 397// static 398base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename( 399 const base::FilePath& db_filename) { 400 return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile); 401} 402 403// static 404base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename( 405 const base::FilePath& db_filename) { 406 return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile); 407} 408 409// static 410base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename( 411 const base::FilePath& db_filename) { 412 return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile); 413} 414 415SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { 416 if (list_id == safe_browsing_util::PHISH || 417 list_id == safe_browsing_util::MALWARE) { 418 return browse_store_.get(); 419 } else if (list_id == safe_browsing_util::BINURL || 420 list_id == safe_browsing_util::BINHASH) { 421 return download_store_.get(); 422 } else if (list_id == safe_browsing_util::CSDWHITELIST) { 423 return csd_whitelist_store_.get(); 424 } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) { 425 return download_whitelist_store_.get(); 426 } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) { 427 return extension_blacklist_store_.get(); 428 } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) { 429 return side_effect_free_whitelist_store_.get(); 430 } 431 return NULL; 432} 433 434// static 435void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) { 436 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type, 437 FAILURE_DATABASE_MAX); 438} 439 440SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() 441 : creation_loop_(base::MessageLoop::current()), 442 browse_store_(new SafeBrowsingStoreFile), 443 reset_factory_(this), 444 corruption_detected_(false), 445 change_detected_(false) { 446 DCHECK(browse_store_.get()); 447 DCHECK(!download_store_.get()); 448 DCHECK(!csd_whitelist_store_.get()); 449 DCHECK(!download_whitelist_store_.get()); 450 DCHECK(!extension_blacklist_store_.get()); 451 DCHECK(!side_effect_free_whitelist_store_.get()); 452} 453 454SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( 455 SafeBrowsingStore* browse_store, 456 SafeBrowsingStore* download_store, 457 SafeBrowsingStore* csd_whitelist_store, 458 SafeBrowsingStore* download_whitelist_store, 459 SafeBrowsingStore* extension_blacklist_store, 460 SafeBrowsingStore* side_effect_free_whitelist_store) 461 : creation_loop_(base::MessageLoop::current()), 462 browse_store_(browse_store), 463 download_store_(download_store), 464 csd_whitelist_store_(csd_whitelist_store), 465 download_whitelist_store_(download_whitelist_store), 466 extension_blacklist_store_(extension_blacklist_store), 467 side_effect_free_whitelist_store_(side_effect_free_whitelist_store), 468 reset_factory_(this), 469 corruption_detected_(false) { 470 DCHECK(browse_store_.get()); 471} 472 473SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { 474 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 475} 476 477void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { 478 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 479 // Ensure we haven't been run before. 480 DCHECK(browse_filename_.empty()); 481 DCHECK(download_filename_.empty()); 482 DCHECK(csd_whitelist_filename_.empty()); 483 DCHECK(download_whitelist_filename_.empty()); 484 DCHECK(extension_blacklist_filename_.empty()); 485 DCHECK(side_effect_free_whitelist_filename_.empty()); 486 487 browse_filename_ = BrowseDBFilename(filename_base); 488 browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_); 489 490 browse_store_->Init( 491 browse_filename_, 492 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 493 base::Unretained(this))); 494 DVLOG(1) << "Init browse store: " << browse_filename_.value(); 495 496 { 497 // NOTE: There is no need to grab the lock in this function, since 498 // until it returns, there are no pointers to this class on other 499 // threads. Then again, that means there is no possibility of 500 // contention on the lock... 501 base::AutoLock locked(lookup_lock_); 502 full_browse_hashes_.clear(); 503 pending_browse_hashes_.clear(); 504 LoadPrefixSet(); 505 } 506 507 if (download_store_.get()) { 508 download_filename_ = DownloadDBFilename(filename_base); 509 download_store_->Init( 510 download_filename_, 511 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 512 base::Unretained(this))); 513 DVLOG(1) << "Init download store: " << download_filename_.value(); 514 } 515 516 if (csd_whitelist_store_.get()) { 517 csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base); 518 csd_whitelist_store_->Init( 519 csd_whitelist_filename_, 520 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 521 base::Unretained(this))); 522 DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value(); 523 std::vector<SBAddFullHash> full_hashes; 524 if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { 525 LoadWhitelist(full_hashes, &csd_whitelist_); 526 } else { 527 WhitelistEverything(&csd_whitelist_); 528 } 529 } else { 530 WhitelistEverything(&csd_whitelist_); // Just to be safe. 531 } 532 533 if (download_whitelist_store_.get()) { 534 download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base); 535 download_whitelist_store_->Init( 536 download_whitelist_filename_, 537 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 538 base::Unretained(this))); 539 DVLOG(1) << "Init download whitelist store: " 540 << download_whitelist_filename_.value(); 541 std::vector<SBAddFullHash> full_hashes; 542 if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) { 543 LoadWhitelist(full_hashes, &download_whitelist_); 544 } else { 545 WhitelistEverything(&download_whitelist_); 546 } 547 } else { 548 WhitelistEverything(&download_whitelist_); // Just to be safe. 549 } 550 551 if (extension_blacklist_store_.get()) { 552 extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base); 553 extension_blacklist_store_->Init( 554 extension_blacklist_filename_, 555 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 556 base::Unretained(this))); 557 DVLOG(1) << "Init extension blacklist store: " 558 << extension_blacklist_filename_.value(); 559 } 560 561 if (side_effect_free_whitelist_store_.get()) { 562 side_effect_free_whitelist_filename_ = 563 SideEffectFreeWhitelistDBFilename(filename_base); 564 side_effect_free_whitelist_prefix_set_filename_ = 565 PrefixSetForFilename(side_effect_free_whitelist_filename_); 566 side_effect_free_whitelist_store_->Init( 567 side_effect_free_whitelist_filename_, 568 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 569 base::Unretained(this))); 570 DVLOG(1) << "Init side-effect free whitelist store: " 571 << side_effect_free_whitelist_filename_.value(); 572 573 // If there is no database, the filter cannot be used. 574 base::PlatformFileInfo db_info; 575 if (file_util::GetFileInfo(side_effect_free_whitelist_filename_, &db_info) 576 && db_info.size != 0) { 577 const base::TimeTicks before = base::TimeTicks::Now(); 578 side_effect_free_whitelist_prefix_set_.reset( 579 safe_browsing::PrefixSet::LoadFile( 580 side_effect_free_whitelist_prefix_set_filename_)); 581 DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist " 582 << "prefix set in " 583 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 584 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad", 585 base::TimeTicks::Now() - before); 586 if (!side_effect_free_whitelist_prefix_set_.get()) 587 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ); 588 } 589 } else { 590 // Delete any files of the side-effect free sidelist that may be around 591 // from when it was previously enabled. 592 SafeBrowsingStoreFile::DeleteStore( 593 SideEffectFreeWhitelistDBFilename(filename_base)); 594 } 595} 596 597bool SafeBrowsingDatabaseNew::ResetDatabase() { 598 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 599 600 // Delete files on disk. 601 // TODO(shess): Hard to see where one might want to delete without a 602 // reset. Perhaps inline |Delete()|? 603 if (!Delete()) 604 return false; 605 606 // Reset objects in memory. 607 { 608 base::AutoLock locked(lookup_lock_); 609 full_browse_hashes_.clear(); 610 pending_browse_hashes_.clear(); 611 prefix_miss_cache_.clear(); 612 browse_prefix_set_.reset(); 613 side_effect_free_whitelist_prefix_set_.reset(); 614 } 615 // Wants to acquire the lock itself. 616 WhitelistEverything(&csd_whitelist_); 617 WhitelistEverything(&download_whitelist_); 618 619 return true; 620} 621 622// TODO(lzheng): Remove matching_list, it is not used anywhere. 623bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 624 const GURL& url, 625 std::string* matching_list, 626 std::vector<SBPrefix>* prefix_hits, 627 std::vector<SBFullHashResult>* full_hits, 628 base::Time last_update) { 629 // Clear the results first. 630 matching_list->clear(); 631 prefix_hits->clear(); 632 full_hits->clear(); 633 634 std::vector<SBFullHash> full_hashes; 635 BrowseFullHashesToCheck(url, false, &full_hashes); 636 if (full_hashes.empty()) 637 return false; 638 639 // This function is called on the I/O thread, prevent changes to 640 // filter and caches. 641 base::AutoLock locked(lookup_lock_); 642 643 // |browse_prefix_set_| is empty until it is either read from disk, or the 644 // first update populates it. Bail out without a hit if not yet 645 // available. 646 if (!browse_prefix_set_.get()) 647 return false; 648 649 size_t miss_count = 0; 650 for (size_t i = 0; i < full_hashes.size(); ++i) { 651 const SBPrefix prefix = full_hashes[i].prefix; 652 if (browse_prefix_set_->Exists(prefix)) { 653 prefix_hits->push_back(prefix); 654 if (prefix_miss_cache_.count(prefix) > 0) 655 ++miss_count; 656 } 657 } 658 659 // If all the prefixes are cached as 'misses', don't issue a GetHash. 660 if (miss_count == prefix_hits->size()) 661 return false; 662 663 // Find the matching full-hash results. |full_browse_hashes_| are from the 664 // database, |pending_browse_hashes_| are from GetHash requests between 665 // updates. 666 std::sort(prefix_hits->begin(), prefix_hits->end()); 667 668 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, 669 full_hits, last_update); 670 GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_, 671 full_hits, last_update); 672 return true; 673} 674 675bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 676 const std::vector<GURL>& urls, 677 std::vector<SBPrefix>* prefix_hits) { 678 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 679 680 // Ignore this check when download checking is not enabled. 681 if (!download_store_.get()) 682 return false; 683 684 std::vector<SBPrefix> prefixes; 685 GetDownloadUrlPrefixes(urls, &prefixes); 686 return MatchAddPrefixes(download_store_.get(), 687 safe_browsing_util::BINURL % 2, 688 prefixes, 689 prefix_hits); 690} 691 692bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix( 693 const SBPrefix& prefix) { 694 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 695 696 // Ignore this check when download store is not available. 697 if (!download_store_.get()) 698 return false; 699 700 std::vector<SBPrefix> prefix_hits; 701 return MatchAddPrefixes(download_store_.get(), 702 safe_browsing_util::BINHASH % 2, 703 std::vector<SBPrefix>(1, prefix), 704 &prefix_hits); 705} 706 707bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { 708 // This method is theoretically thread-safe but we expect all calls to 709 // originate from the IO thread. 710 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 711 std::vector<SBFullHash> full_hashes; 712 BrowseFullHashesToCheck(url, true, &full_hashes); 713 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 714} 715 716bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) { 717 std::vector<SBFullHash> full_hashes; 718 BrowseFullHashesToCheck(url, true, &full_hashes); 719 return ContainsWhitelistedHashes(download_whitelist_, full_hashes); 720} 721 722bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes( 723 const std::vector<SBPrefix>& prefixes, 724 std::vector<SBPrefix>* prefix_hits) { 725 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 726 if (!extension_blacklist_store_) 727 return false; 728 729 return MatchAddPrefixes(extension_blacklist_store_.get(), 730 safe_browsing_util::EXTENSIONBLACKLIST % 2, 731 prefixes, 732 prefix_hits); 733} 734 735bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl( 736 const GURL& url) { 737 SBFullHash full_hash; 738 std::string host; 739 std::string path; 740 std::string query; 741 safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query); 742 std::string url_to_check = host + path; 743 if (!query.empty()) 744 url_to_check += "?" + query; 745 crypto::SHA256HashString(url_to_check, &full_hash, sizeof(full_hash)); 746 747 // This function can be called on any thread, so lock against any changes 748 base::AutoLock locked(lookup_lock_); 749 750 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read 751 // from disk, or the first update populates it. Bail out without a hit if 752 // not yet available. 753 if (!side_effect_free_whitelist_prefix_set_.get()) 754 return false; 755 756 return side_effect_free_whitelist_prefix_set_->Exists(full_hash.prefix); 757} 758 759bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString( 760 const std::string& str) { 761 SBFullHash hash; 762 crypto::SHA256HashString(str, &hash, sizeof(hash)); 763 std::vector<SBFullHash> hashes; 764 hashes.push_back(hash); 765 return ContainsWhitelistedHashes(download_whitelist_, hashes); 766} 767 768bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes( 769 const SBWhitelist& whitelist, 770 const std::vector<SBFullHash>& hashes) { 771 base::AutoLock l(lookup_lock_); 772 if (whitelist.second) 773 return true; 774 for (std::vector<SBFullHash>::const_iterator it = hashes.begin(); 775 it != hashes.end(); ++it) { 776 if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), *it)) 777 return true; 778 } 779 return false; 780} 781 782// Helper to insert entries for all of the prefixes or full hashes in 783// |entry| into the store. 784void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, 785 const SBEntry* entry, int list_id) { 786 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 787 788 SafeBrowsingStore* store = GetStore(list_id); 789 if (!store) return; 790 791 STATS_COUNTER("SB.HostInsert", 1); 792 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 793 const int count = entry->prefix_count(); 794 795 DCHECK(!entry->IsSub()); 796 if (!count) { 797 // No prefixes, use host instead. 798 STATS_COUNTER("SB.PrefixAdd", 1); 799 store->WriteAddPrefix(encoded_chunk_id, host); 800 } else if (entry->IsPrefix()) { 801 // Prefixes only. 802 for (int i = 0; i < count; i++) { 803 const SBPrefix prefix = entry->PrefixAt(i); 804 STATS_COUNTER("SB.PrefixAdd", 1); 805 store->WriteAddPrefix(encoded_chunk_id, prefix); 806 } 807 } else { 808 // Prefixes and hashes. 809 const base::Time receive_time = base::Time::Now(); 810 for (int i = 0; i < count; ++i) { 811 const SBFullHash full_hash = entry->FullHashAt(i); 812 const SBPrefix prefix = full_hash.prefix; 813 814 STATS_COUNTER("SB.PrefixAdd", 1); 815 store->WriteAddPrefix(encoded_chunk_id, prefix); 816 817 STATS_COUNTER("SB.PrefixAddFull", 1); 818 store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); 819 } 820 } 821} 822 823// Helper to iterate over all the entries in the hosts in |chunks| and 824// add them to the store. 825void SafeBrowsingDatabaseNew::InsertAddChunks( 826 const safe_browsing_util::ListType list_id, 827 const SBChunkList& chunks) { 828 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 829 830 SafeBrowsingStore* store = GetStore(list_id); 831 if (!store) return; 832 833 for (SBChunkList::const_iterator citer = chunks.begin(); 834 citer != chunks.end(); ++citer) { 835 const int chunk_id = citer->chunk_number; 836 837 // The server can give us a chunk that we already have because 838 // it's part of a range. Don't add it again. 839 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 840 if (store->CheckAddChunk(encoded_chunk_id)) 841 continue; 842 843 store->SetAddChunk(encoded_chunk_id); 844 for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin(); 845 hiter != citer->hosts.end(); ++hiter) { 846 // NOTE: Could pass |encoded_chunk_id|, but then inserting add 847 // chunks would look different from inserting sub chunks. 848 InsertAdd(chunk_id, hiter->host, hiter->entry, list_id); 849 } 850 } 851} 852 853// Helper to insert entries for all of the prefixes or full hashes in 854// |entry| into the store. 855void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host, 856 const SBEntry* entry, int list_id) { 857 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 858 859 SafeBrowsingStore* store = GetStore(list_id); 860 if (!store) return; 861 862 STATS_COUNTER("SB.HostDelete", 1); 863 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 864 const int count = entry->prefix_count(); 865 866 DCHECK(entry->IsSub()); 867 if (!count) { 868 // No prefixes, use host instead. 869 STATS_COUNTER("SB.PrefixSub", 1); 870 const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id); 871 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host); 872 } else if (entry->IsPrefix()) { 873 // Prefixes only. 874 for (int i = 0; i < count; i++) { 875 const SBPrefix prefix = entry->PrefixAt(i); 876 const int add_chunk_id = 877 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); 878 879 STATS_COUNTER("SB.PrefixSub", 1); 880 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix); 881 } 882 } else { 883 // Prefixes and hashes. 884 for (int i = 0; i < count; ++i) { 885 const SBFullHash full_hash = entry->FullHashAt(i); 886 const int add_chunk_id = 887 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); 888 889 STATS_COUNTER("SB.PrefixSub", 1); 890 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix); 891 892 STATS_COUNTER("SB.PrefixSubFull", 1); 893 store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash); 894 } 895 } 896} 897 898// Helper to iterate over all the entries in the hosts in |chunks| and 899// add them to the store. 900void SafeBrowsingDatabaseNew::InsertSubChunks( 901 safe_browsing_util::ListType list_id, 902 const SBChunkList& chunks) { 903 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 904 905 SafeBrowsingStore* store = GetStore(list_id); 906 if (!store) return; 907 908 for (SBChunkList::const_iterator citer = chunks.begin(); 909 citer != chunks.end(); ++citer) { 910 const int chunk_id = citer->chunk_number; 911 912 // The server can give us a chunk that we already have because 913 // it's part of a range. Don't add it again. 914 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 915 if (store->CheckSubChunk(encoded_chunk_id)) 916 continue; 917 918 store->SetSubChunk(encoded_chunk_id); 919 for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin(); 920 hiter != citer->hosts.end(); ++hiter) { 921 InsertSub(chunk_id, hiter->host, hiter->entry, list_id); 922 } 923 } 924} 925 926void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name, 927 const SBChunkList& chunks) { 928 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 929 930 if (corruption_detected_ || chunks.empty()) 931 return; 932 933 const base::TimeTicks before = base::TimeTicks::Now(); 934 935 const safe_browsing_util::ListType list_id = 936 safe_browsing_util::GetListId(list_name); 937 DVLOG(2) << list_name << ": " << list_id; 938 939 SafeBrowsingStore* store = GetStore(list_id); 940 if (!store) return; 941 942 change_detected_ = true; 943 944 store->BeginChunk(); 945 if (chunks.front().is_add) { 946 InsertAddChunks(list_id, chunks); 947 } else { 948 InsertSubChunks(list_id, chunks); 949 } 950 store->FinishChunk(); 951 952 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before); 953} 954 955void SafeBrowsingDatabaseNew::DeleteChunks( 956 const std::vector<SBChunkDelete>& chunk_deletes) { 957 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 958 959 if (corruption_detected_ || chunk_deletes.empty()) 960 return; 961 962 const std::string& list_name = chunk_deletes.front().list_name; 963 const safe_browsing_util::ListType list_id = 964 safe_browsing_util::GetListId(list_name); 965 966 SafeBrowsingStore* store = GetStore(list_id); 967 if (!store) return; 968 969 change_detected_ = true; 970 971 for (size_t i = 0; i < chunk_deletes.size(); ++i) { 972 std::vector<int> chunk_numbers; 973 RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers); 974 for (size_t j = 0; j < chunk_numbers.size(); ++j) { 975 const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id); 976 if (chunk_deletes[i].is_sub_del) 977 store->DeleteSubChunk(encoded_chunk_id); 978 else 979 store->DeleteAddChunk(encoded_chunk_id); 980 } 981 } 982} 983 984void SafeBrowsingDatabaseNew::CacheHashResults( 985 const std::vector<SBPrefix>& prefixes, 986 const std::vector<SBFullHashResult>& full_hits) { 987 // This is called on the I/O thread, lock against updates. 988 base::AutoLock locked(lookup_lock_); 989 990 if (full_hits.empty()) { 991 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); 992 return; 993 } 994 995 // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. 996 // Refactor to make them identical. 997 const base::Time now = base::Time::Now(); 998 const size_t orig_size = pending_browse_hashes_.size(); 999 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); 1000 iter != full_hits.end(); ++iter) { 1001 const int list_id = safe_browsing_util::GetListId(iter->list_name); 1002 if (list_id == safe_browsing_util::MALWARE || 1003 list_id == safe_browsing_util::PHISH) { 1004 int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id); 1005 SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash); 1006 pending_browse_hashes_.push_back(add_full_hash); 1007 } 1008 } 1009 1010 // Sort new entries then merge with the previously-sorted entries. 1011 std::vector<SBAddFullHash>::iterator 1012 orig_end = pending_browse_hashes_.begin() + orig_size; 1013 std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess); 1014 std::inplace_merge(pending_browse_hashes_.begin(), 1015 orig_end, pending_browse_hashes_.end(), 1016 SBAddFullHashPrefixLess); 1017} 1018 1019bool SafeBrowsingDatabaseNew::UpdateStarted( 1020 std::vector<SBListChunkRanges>* lists) { 1021 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1022 DCHECK(lists); 1023 1024 // If |BeginUpdate()| fails, reset the database. 1025 if (!browse_store_->BeginUpdate()) { 1026 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 1027 HandleCorruptDatabase(); 1028 return false; 1029 } 1030 1031 if (download_store_.get() && !download_store_->BeginUpdate()) { 1032 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN); 1033 HandleCorruptDatabase(); 1034 return false; 1035 } 1036 1037 if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { 1038 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 1039 HandleCorruptDatabase(); 1040 return false; 1041 } 1042 1043 if (download_whitelist_store_.get() && 1044 !download_whitelist_store_->BeginUpdate()) { 1045 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 1046 HandleCorruptDatabase(); 1047 return false; 1048 } 1049 1050 if (extension_blacklist_store_ && 1051 !extension_blacklist_store_->BeginUpdate()) { 1052 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN); 1053 HandleCorruptDatabase(); 1054 return false; 1055 } 1056 1057 if (side_effect_free_whitelist_store_ && 1058 !side_effect_free_whitelist_store_->BeginUpdate()) { 1059 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN); 1060 HandleCorruptDatabase(); 1061 return false; 1062 } 1063 1064 std::vector<std::string> browse_listnames; 1065 browse_listnames.push_back(safe_browsing_util::kMalwareList); 1066 browse_listnames.push_back(safe_browsing_util::kPhishingList); 1067 UpdateChunkRanges(browse_store_.get(), browse_listnames, lists); 1068 1069 if (download_store_.get()) { 1070 // This store used to contain kBinHashList in addition to 1071 // kBinUrlList. Strip the stale data before generating the chunk 1072 // ranges to request. UpdateChunkRanges() will traverse the chunk 1073 // list, so this is very cheap if there are no kBinHashList chunks. 1074 const int listid = 1075 safe_browsing_util::GetListId(safe_browsing_util::kBinHashList); 1076 DeleteChunksFromStore(download_store_.get(), listid); 1077 1078 // The above marks the chunks for deletion, but they are not 1079 // actually deleted until the database is rewritten. The 1080 // following code removes the kBinHashList part of the request 1081 // before continuing so that UpdateChunkRanges() doesn't break. 1082 std::vector<std::string> download_listnames; 1083 download_listnames.push_back(safe_browsing_util::kBinUrlList); 1084 download_listnames.push_back(safe_browsing_util::kBinHashList); 1085 UpdateChunkRanges(download_store_.get(), download_listnames, lists); 1086 DCHECK_EQ(lists->back().name, 1087 std::string(safe_browsing_util::kBinHashList)); 1088 lists->pop_back(); 1089 1090 // TODO(shess): This problem could also be handled in 1091 // BeginUpdate() by detecting the chunks to delete and rewriting 1092 // the database before it's used. When I implemented that, it 1093 // felt brittle, it might be easier to just wait for some future 1094 // format change. 1095 } 1096 1097 if (csd_whitelist_store_.get()) { 1098 std::vector<std::string> csd_whitelist_listnames; 1099 csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList); 1100 UpdateChunkRanges(csd_whitelist_store_.get(), 1101 csd_whitelist_listnames, lists); 1102 } 1103 1104 if (download_whitelist_store_.get()) { 1105 std::vector<std::string> download_whitelist_listnames; 1106 download_whitelist_listnames.push_back( 1107 safe_browsing_util::kDownloadWhiteList); 1108 UpdateChunkRanges(download_whitelist_store_.get(), 1109 download_whitelist_listnames, lists); 1110 } 1111 1112 if (extension_blacklist_store_) { 1113 UpdateChunkRanges( 1114 extension_blacklist_store_.get(), 1115 std::vector<std::string>(1, safe_browsing_util::kExtensionBlacklist), 1116 lists); 1117 } 1118 1119 if (side_effect_free_whitelist_store_) { 1120 UpdateChunkRanges( 1121 side_effect_free_whitelist_store_.get(), 1122 std::vector<std::string>( 1123 1, safe_browsing_util::kSideEffectFreeWhitelist), 1124 lists); 1125 } 1126 1127 corruption_detected_ = false; 1128 change_detected_ = false; 1129 return true; 1130} 1131 1132void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { 1133 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1134 1135 // The update may have failed due to corrupt storage (for instance, 1136 // an excessive number of invalid add_chunks and sub_chunks). 1137 // Double-check that the databases are valid. 1138 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk 1139 // sections would allow throwing a corruption error in 1140 // UpdateStarted(). 1141 if (!update_succeeded) { 1142 if (!browse_store_->CheckValidity()) 1143 DLOG(ERROR) << "Safe-browsing browse database corrupt."; 1144 1145 if (download_store_.get() && !download_store_->CheckValidity()) 1146 DLOG(ERROR) << "Safe-browsing download database corrupt."; 1147 1148 if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity()) 1149 DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt."; 1150 1151 if (download_whitelist_store_.get() && 1152 !download_whitelist_store_->CheckValidity()) { 1153 DLOG(ERROR) << "Safe-browsing download whitelist database corrupt."; 1154 } 1155 1156 if (extension_blacklist_store_ && 1157 !extension_blacklist_store_->CheckValidity()) { 1158 DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt."; 1159 } 1160 1161 if (side_effect_free_whitelist_store_ && 1162 !side_effect_free_whitelist_store_->CheckValidity()) { 1163 DLOG(ERROR) << "Safe-browsing side-effect free whitelist database " 1164 << "corrupt."; 1165 } 1166 } 1167 1168 if (corruption_detected_) 1169 return; 1170 1171 // Unroll the transaction if there was a protocol error or if the 1172 // transaction was empty. This will leave the prefix set, the 1173 // pending hashes, and the prefix miss cache in place. 1174 if (!update_succeeded || !change_detected_) { 1175 // Track empty updates to answer questions at http://crbug.com/72216 . 1176 if (update_succeeded && !change_detected_) 1177 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0); 1178 browse_store_->CancelUpdate(); 1179 if (download_store_.get()) 1180 download_store_->CancelUpdate(); 1181 if (csd_whitelist_store_.get()) 1182 csd_whitelist_store_->CancelUpdate(); 1183 if (download_whitelist_store_.get()) 1184 download_whitelist_store_->CancelUpdate(); 1185 if (extension_blacklist_store_) 1186 extension_blacklist_store_->CancelUpdate(); 1187 if (side_effect_free_whitelist_store_) 1188 side_effect_free_whitelist_store_->CancelUpdate(); 1189 return; 1190 } 1191 1192 if (download_store_) { 1193 int64 size_bytes = UpdateHashPrefixStore( 1194 download_filename_, 1195 download_store_.get(), 1196 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); 1197 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", 1198 static_cast<int>(size_bytes / 1024)); 1199 } 1200 1201 UpdateBrowseStore(); 1202 UpdateWhitelistStore(csd_whitelist_filename_, 1203 csd_whitelist_store_.get(), 1204 &csd_whitelist_); 1205 UpdateWhitelistStore(download_whitelist_filename_, 1206 download_whitelist_store_.get(), 1207 &download_whitelist_); 1208 1209 if (extension_blacklist_store_) { 1210 int64 size_bytes = UpdateHashPrefixStore( 1211 extension_blacklist_filename_, 1212 extension_blacklist_store_.get(), 1213 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH); 1214 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes", 1215 static_cast<int>(size_bytes / 1024)); 1216 } 1217 1218 if (side_effect_free_whitelist_store_) 1219 UpdateSideEffectFreeWhitelistStore(); 1220} 1221 1222void SafeBrowsingDatabaseNew::UpdateWhitelistStore( 1223 const base::FilePath& store_filename, 1224 SafeBrowsingStore* store, 1225 SBWhitelist* whitelist) { 1226 if (!store) 1227 return; 1228 1229 // For the whitelists, we don't cache and save full hashes since all 1230 // hashes are already full. 1231 std::vector<SBAddFullHash> empty_add_hashes; 1232 1233 // Not needed for the whitelists. 1234 std::set<SBPrefix> empty_miss_cache; 1235 1236 // Note: prefixes will not be empty. The current data store implementation 1237 // stores all full-length hashes as both full and prefix hashes. 1238 SBAddPrefixes prefixes; 1239 std::vector<SBAddFullHash> full_hashes; 1240 if (!store->FinishUpdate(empty_add_hashes, empty_miss_cache, &prefixes, 1241 &full_hashes)) { 1242 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); 1243 WhitelistEverything(whitelist); 1244 return; 1245 } 1246 1247#if defined(OS_MACOSX) 1248 base::mac::SetFileBackupExclusion(store_filename); 1249#endif 1250 1251 LoadWhitelist(full_hashes, whitelist); 1252} 1253 1254int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( 1255 const base::FilePath& store_filename, 1256 SafeBrowsingStore* store, 1257 FailureType failure_type) { 1258 // We don't cache and save full hashes. 1259 std::vector<SBAddFullHash> empty_add_hashes; 1260 1261 // Backend lookup happens only if a prefix is in add list. 1262 std::set<SBPrefix> empty_miss_cache; 1263 1264 // These results are not used after this call. Simply ignore the 1265 // returned value after FinishUpdate(...). 1266 SBAddPrefixes add_prefixes_result; 1267 std::vector<SBAddFullHash> add_full_hashes_result; 1268 1269 if (!store->FinishUpdate(empty_add_hashes, 1270 empty_miss_cache, 1271 &add_prefixes_result, 1272 &add_full_hashes_result)) { 1273 RecordFailure(failure_type); 1274 } 1275 1276#if defined(OS_MACOSX) 1277 base::mac::SetFileBackupExclusion(store_filename); 1278#endif 1279 1280 return GetFileSizeOrZero(store_filename); 1281} 1282 1283void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1284 // Copy out the pending add hashes. Copy rather than swapping in 1285 // case |ContainsBrowseURL()| is called before the new filter is complete. 1286 std::vector<SBAddFullHash> pending_add_hashes; 1287 { 1288 base::AutoLock locked(lookup_lock_); 1289 pending_add_hashes.insert(pending_add_hashes.end(), 1290 pending_browse_hashes_.begin(), 1291 pending_browse_hashes_.end()); 1292 } 1293 1294 // Measure the amount of IO during the filter build. 1295 base::IoCounters io_before, io_after; 1296 base::ProcessHandle handle = base::Process::Current().handle(); 1297 scoped_ptr<base::ProcessMetrics> metric( 1298#if !defined(OS_MACOSX) 1299 base::ProcessMetrics::CreateProcessMetrics(handle) 1300#else 1301 // Getting stats only for the current process is enough, so NULL is fine. 1302 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1303#endif 1304 ); 1305 1306 // IoCounters are currently not supported on Mac, and may not be 1307 // available for Linux, so we check the result and only show IO 1308 // stats if they are available. 1309 const bool got_counters = metric->GetIOCounters(&io_before); 1310 1311 const base::TimeTicks before = base::TimeTicks::Now(); 1312 1313 SBAddPrefixes add_prefixes; 1314 std::vector<SBAddFullHash> add_full_hashes; 1315 if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_, 1316 &add_prefixes, &add_full_hashes)) { 1317 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1318 return; 1319 } 1320 1321 // TODO(shess): If |add_prefixes| were sorted by the prefix, it 1322 // could be passed directly to |PrefixSet()|, removing the need for 1323 // |prefixes|. For now, |prefixes| is useful while debugging 1324 // things. 1325 std::vector<SBPrefix> prefixes; 1326 prefixes.reserve(add_prefixes.size()); 1327 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 1328 iter != add_prefixes.end(); ++iter) { 1329 prefixes.push_back(iter->prefix); 1330 } 1331 1332 std::sort(prefixes.begin(), prefixes.end()); 1333 scoped_ptr<safe_browsing::PrefixSet> 1334 prefix_set(new safe_browsing::PrefixSet(prefixes)); 1335 1336 // This needs to be in sorted order by prefix for efficient access. 1337 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 1338 SBAddFullHashPrefixLess); 1339 1340 // Swap in the newly built filter and cache. 1341 { 1342 base::AutoLock locked(lookup_lock_); 1343 full_browse_hashes_.swap(add_full_hashes); 1344 1345 // TODO(shess): If |CacheHashResults()| is posted between the 1346 // earlier lock and this clear, those pending hashes will be lost. 1347 // It could be fixed by only removing hashes which were collected 1348 // at the earlier point. I believe that is fail-safe as-is (the 1349 // hash will be fetched again). 1350 pending_browse_hashes_.clear(); 1351 prefix_miss_cache_.clear(); 1352 browse_prefix_set_.swap(prefix_set); 1353 } 1354 1355 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " 1356 << (base::TimeTicks::Now() - before).InMilliseconds() 1357 << " ms total. prefix count: " << add_prefixes.size(); 1358 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); 1359 1360 // Persist the prefix set to disk. Since only this thread changes 1361 // |browse_prefix_set_|, there is no need to lock. 1362 WritePrefixSet(); 1363 1364 // Gather statistics. 1365 if (got_counters && metric->GetIOCounters(&io_after)) { 1366 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes", 1367 static_cast<int>(io_after.ReadTransferCount - 1368 io_before.ReadTransferCount) / 1024); 1369 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes", 1370 static_cast<int>(io_after.WriteTransferCount - 1371 io_before.WriteTransferCount) / 1024); 1372 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", 1373 static_cast<int>(io_after.ReadOperationCount - 1374 io_before.ReadOperationCount)); 1375 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", 1376 static_cast<int>(io_after.WriteOperationCount - 1377 io_before.WriteOperationCount)); 1378 } 1379 1380 int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_); 1381 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes", 1382 static_cast<int>(file_size / 1024)); 1383 file_size = GetFileSizeOrZero(browse_filename_); 1384 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1385 static_cast<int>(file_size / 1024)); 1386 1387#if defined(OS_MACOSX) 1388 base::mac::SetFileBackupExclusion(browse_filename_); 1389#endif 1390} 1391 1392void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { 1393 std::vector<SBAddFullHash> empty_add_hashes; 1394 std::set<SBPrefix> empty_miss_cache; 1395 SBAddPrefixes add_prefixes; 1396 std::vector<SBAddFullHash> add_full_hashes_result; 1397 1398 if (!side_effect_free_whitelist_store_->FinishUpdate( 1399 empty_add_hashes, 1400 empty_miss_cache, 1401 &add_prefixes, 1402 &add_full_hashes_result)) { 1403 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); 1404 return; 1405 } 1406 1407 // TODO(shess): If |add_prefixes| were sorted by the prefix, it 1408 // could be passed directly to |PrefixSet()|, removing the need for 1409 // |prefixes|. For now, |prefixes| is useful while debugging 1410 // things. 1411 std::vector<SBPrefix> prefixes; 1412 prefixes.reserve(add_prefixes.size()); 1413 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 1414 iter != add_prefixes.end(); ++iter) { 1415 prefixes.push_back(iter->prefix); 1416 } 1417 1418 std::sort(prefixes.begin(), prefixes.end()); 1419 scoped_ptr<safe_browsing::PrefixSet> 1420 prefix_set(new safe_browsing::PrefixSet(prefixes)); 1421 1422 // Swap in the newly built prefix set. 1423 { 1424 base::AutoLock locked(lookup_lock_); 1425 side_effect_free_whitelist_prefix_set_.swap(prefix_set); 1426 } 1427 1428 const base::TimeTicks before = base::TimeTicks::Now(); 1429 const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile( 1430 side_effect_free_whitelist_prefix_set_filename_); 1431 DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix " 1432 << "set in " << (base::TimeTicks::Now() - before).InMilliseconds() 1433 << " ms"; 1434 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite", 1435 base::TimeTicks::Now() - before); 1436 1437 if (!write_ok) 1438 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE); 1439 1440 // Gather statistics. 1441 int64 file_size = GetFileSizeOrZero( 1442 side_effect_free_whitelist_prefix_set_filename_); 1443 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes", 1444 static_cast<int>(file_size / 1024)); 1445 file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_); 1446 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes", 1447 static_cast<int>(file_size / 1024)); 1448 1449#if defined(OS_MACOSX) 1450 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_); 1451 base::mac::SetFileBackupExclusion( 1452 side_effect_free_whitelist_prefix_set_filename_); 1453#endif 1454} 1455 1456void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { 1457 // Reset the database after the current task has unwound (but only 1458 // reset once within the scope of a given task). 1459 if (!reset_factory_.HasWeakPtrs()) { 1460 RecordFailure(FAILURE_DATABASE_CORRUPT); 1461 base::MessageLoop::current()->PostTask(FROM_HERE, 1462 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase, 1463 reset_factory_.GetWeakPtr())); 1464 } 1465} 1466 1467void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { 1468 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER); 1469 corruption_detected_ = true; // Stop updating the database. 1470 ResetDatabase(); 1471 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset"; 1472} 1473 1474// TODO(shess): I'm not clear why this code doesn't have any 1475// real error-handling. 1476void SafeBrowsingDatabaseNew::LoadPrefixSet() { 1477 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1478 DCHECK(!browse_prefix_set_filename_.empty()); 1479 1480 // If there is no database, the filter cannot be used. 1481 base::PlatformFileInfo db_info; 1482 if (!file_util::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0) 1483 return; 1484 1485 // Cleanup any stale bloom filter (no longer used). 1486 // TODO(shess): Track failure to delete? 1487 base::FilePath bloom_filter_filename = 1488 BloomFilterForFilename(browse_filename_); 1489 base::DeleteFile(bloom_filter_filename, false); 1490 1491 const base::TimeTicks before = base::TimeTicks::Now(); 1492 browse_prefix_set_.reset(safe_browsing::PrefixSet::LoadFile( 1493 browse_prefix_set_filename_)); 1494 DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in " 1495 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1496 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before); 1497 1498 if (!browse_prefix_set_.get()) 1499 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ); 1500} 1501 1502bool SafeBrowsingDatabaseNew::Delete() { 1503 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1504 1505 const bool r1 = browse_store_->Delete(); 1506 if (!r1) 1507 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1508 1509 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 1510 if (!r2) 1511 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1512 1513 const bool r3 = csd_whitelist_store_.get() ? 1514 csd_whitelist_store_->Delete() : true; 1515 if (!r3) 1516 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1517 1518 const bool r4 = download_whitelist_store_.get() ? 1519 download_whitelist_store_->Delete() : true; 1520 if (!r4) 1521 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1522 1523 base::FilePath bloom_filter_filename = 1524 BloomFilterForFilename(browse_filename_); 1525 const bool r5 = base::DeleteFile(bloom_filter_filename, false); 1526 if (!r5) 1527 RecordFailure(FAILURE_DATABASE_FILTER_DELETE); 1528 1529 const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false); 1530 if (!r6) 1531 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE); 1532 1533 const bool r7 = base::DeleteFile(extension_blacklist_filename_, false); 1534 if (!r7) 1535 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE); 1536 1537 const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_, 1538 false); 1539 if (!r8) 1540 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE); 1541 1542 const bool r9 = base::DeleteFile( 1543 side_effect_free_whitelist_prefix_set_filename_, 1544 false); 1545 if (!r9) 1546 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE); 1547 1548 return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9; 1549} 1550 1551void SafeBrowsingDatabaseNew::WritePrefixSet() { 1552 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1553 1554 if (!browse_prefix_set_.get()) 1555 return; 1556 1557 const base::TimeTicks before = base::TimeTicks::Now(); 1558 const bool write_ok = browse_prefix_set_->WriteFile( 1559 browse_prefix_set_filename_); 1560 DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in " 1561 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1562 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before); 1563 1564 if (!write_ok) 1565 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE); 1566 1567#if defined(OS_MACOSX) 1568 base::mac::SetFileBackupExclusion(browse_prefix_set_filename_); 1569#endif 1570} 1571 1572void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) { 1573 base::AutoLock locked(lookup_lock_); 1574 whitelist->second = true; 1575 whitelist->first.clear(); 1576} 1577 1578void SafeBrowsingDatabaseNew::LoadWhitelist( 1579 const std::vector<SBAddFullHash>& full_hashes, 1580 SBWhitelist* whitelist) { 1581 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1582 if (full_hashes.size() > kMaxWhitelistSize) { 1583 WhitelistEverything(whitelist); 1584 return; 1585 } 1586 1587 std::vector<SBFullHash> new_whitelist; 1588 new_whitelist.reserve(full_hashes.size()); 1589 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1590 it != full_hashes.end(); ++it) { 1591 new_whitelist.push_back(it->full_hash); 1592 } 1593 std::sort(new_whitelist.begin(), new_whitelist.end()); 1594 1595 SBFullHash kill_switch; 1596 crypto::SHA256HashString(kWhitelistKillSwitchUrl, &kill_switch, 1597 sizeof(kill_switch)); 1598 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), 1599 kill_switch)) { 1600 // The kill switch is whitelisted hence we whitelist all URLs. 1601 WhitelistEverything(whitelist); 1602 } else { 1603 base::AutoLock locked(lookup_lock_); 1604 whitelist->second = false; 1605 whitelist->first.swap(new_whitelist); 1606 } 1607} 1608