1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/safe_browsing/bloom_filter.h" 6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <limits.h> 8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <set> 10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <vector> 11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/file_util.h" 13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/logging.h" 14ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/memory/ref_counted.h" 15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/path_service.h" 16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/rand_util.h" 17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h" 18ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/memory/scoped_temp_dir.h" 19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "testing/gtest/include/gtest/gtest.h" 20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace { 22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 23c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochSBPrefix GenHash() { 24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return static_cast<SBPrefix>(base::RandUint64()); 25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 29c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(SafeBrowsingBloomFilter, BloomFilterUse) { 30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Use a small number for unit test so it's not slow. 31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int count = 1000; 32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Build up the bloom filter. 34513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch scoped_refptr<BloomFilter> filter( 35513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch new BloomFilter(count * BloomFilter::kBloomFilterSizeRatio)); 36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch typedef std::set<SBPrefix> Values; 38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Values values; 39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (int i = 0; i < count; ++i) { 40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch SBPrefix value = GenHash(); 41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch values.insert(value); 42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch filter->Insert(value); 43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Check serialization works. 46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char* data_copy = new char[filter->size()]; 47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch memcpy(data_copy, filter->data(), filter->size()); 48513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch scoped_refptr<BloomFilter> filter_copy( 49513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch new BloomFilter(data_copy, filter->size(), filter->hash_keys_)); 50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Check no false negatives by ensuring that every time we inserted exists. 52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (Values::const_iterator i = values.begin(); i != values.end(); ++i) 53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_TRUE(filter_copy->Exists(*i)); 54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Check false positive error rate by checking the same number of items that 56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // we inserted, but of different values, and calculating what percentage are 57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // "found". 58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int found_count = 0; 59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int checked = 0; 60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (true) { 61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch SBPrefix value = GenHash(); 62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (values.count(value)) 63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch continue; 64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (filter_copy->Exists(value)) 66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch found_count++; 67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch checked++; 69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (checked == count) 70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch break; 71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // The FP rate should be 1.2%. Keep a large margin of error because we don't 74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // want to fail this test because we happened to randomly pick a lot of FPs. 75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch double fp_rate = found_count * 100.0 / count; 76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch CHECK(fp_rate < 5.0); 77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 78513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch VLOG(1) << "For safe browsing bloom filter of size " << count 79513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch << ", the FP rate was " << fp_rate << " %"; 80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Test that we can read and write the bloom filter file. 83c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(SafeBrowsingBloomFilter, BloomFilterFile) { 84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Create initial filter. 85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const int kTestEntries = BloomFilter::kBloomFilterMinSize; 86513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch scoped_refptr<BloomFilter> filter_write( 87513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch new BloomFilter(kTestEntries * BloomFilter::kBloomFilterSizeRatio)); 88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (int i = 0; i < kTestEntries; ++i) 90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch filter_write->Insert(GenHash()); 91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Remove any left over test filters and serialize. 93ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ScopedTempDir temp_dir; 94ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ASSERT_TRUE(temp_dir.CreateUniqueTempDir()); 95ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen FilePath filter_path = temp_dir.path().AppendASCII("SafeBrowsingTestFilter"); 96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ASSERT_TRUE(filter_write->WriteFile(filter_path)); 97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Create new empty filter and load from disk. 99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch BloomFilter* filter = BloomFilter::LoadFile(filter_path); 100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ASSERT_TRUE(filter != NULL); 101513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch scoped_refptr<BloomFilter> filter_read(filter); 102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Check data consistency. 104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ(filter_write->hash_keys_.size(), filter_read->hash_keys_.size()); 105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < filter_write->hash_keys_.size(); ++i) 107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ(filter_write->hash_keys_[i], filter_read->hash_keys_[i]); 108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ(filter_write->size(), filter_read->size()); 110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ(0, 112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch memcmp(filter_write->data(), filter_read->data(), filter_read->size())); 113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 114