1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/safe_browsing/bloom_filter.h"
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <limits.h>
8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <set>
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <vector>
11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/file_util.h"
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/logging.h"
14ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/memory/ref_counted.h"
15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/path_service.h"
16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/rand_util.h"
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h"
18ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/memory/scoped_temp_dir.h"
19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "testing/gtest/include/gtest/gtest.h"
20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace {
22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
23c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochSBPrefix GenHash() {
24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return static_cast<SBPrefix>(base::RandUint64());
25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
29c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(SafeBrowsingBloomFilter, BloomFilterUse) {
30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Use a small number for unit test so it's not slow.
31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int count = 1000;
32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Build up the bloom filter.
34513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  scoped_refptr<BloomFilter> filter(
35513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch      new BloomFilter(count * BloomFilter::kBloomFilterSizeRatio));
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  typedef std::set<SBPrefix> Values;
38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Values values;
39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (int i = 0; i < count; ++i) {
40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    SBPrefix value = GenHash();
41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    values.insert(value);
42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    filter->Insert(value);
43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Check serialization works.
46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  char* data_copy = new char[filter->size()];
47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  memcpy(data_copy, filter->data(), filter->size());
48513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  scoped_refptr<BloomFilter> filter_copy(
49513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch      new BloomFilter(data_copy, filter->size(), filter->hash_keys_));
50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Check no false negatives by ensuring that every time we inserted exists.
52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (Values::const_iterator i = values.begin(); i != values.end(); ++i)
53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_TRUE(filter_copy->Exists(*i));
54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Check false positive error rate by checking the same number of items that
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // we inserted, but of different values, and calculating what percentage are
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // "found".
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int found_count = 0;
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int checked = 0;
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  while (true) {
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    SBPrefix value = GenHash();
62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (values.count(value))
63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      continue;
64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (filter_copy->Exists(value))
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      found_count++;
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    checked++;
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (checked == count)
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      break;
71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // The FP rate should be 1.2%.  Keep a large margin of error because we don't
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // want to fail this test because we happened to randomly pick a lot of FPs.
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  double fp_rate = found_count * 100.0 / count;
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  CHECK(fp_rate < 5.0);
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
78513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  VLOG(1) << "For safe browsing bloom filter of size " << count
79513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch          << ", the FP rate was " << fp_rate << " %";
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Test that we can read and write the bloom filter file.
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST(SafeBrowsingBloomFilter, BloomFilterFile) {
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Create initial filter.
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const int kTestEntries = BloomFilter::kBloomFilterMinSize;
86513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  scoped_refptr<BloomFilter> filter_write(
87513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch      new BloomFilter(kTestEntries * BloomFilter::kBloomFilterSizeRatio));
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (int i = 0; i < kTestEntries; ++i)
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    filter_write->Insert(GenHash());
91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Remove any left over test filters and serialize.
93ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  ScopedTempDir temp_dir;
94ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
95ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  FilePath filter_path = temp_dir.path().AppendASCII("SafeBrowsingTestFilter");
96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ASSERT_TRUE(filter_write->WriteFile(filter_path));
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Create new empty filter and load from disk.
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  BloomFilter* filter = BloomFilter::LoadFile(filter_path);
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ASSERT_TRUE(filter != NULL);
101513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  scoped_refptr<BloomFilter> filter_read(filter);
102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Check data consistency.
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(filter_write->hash_keys_.size(), filter_read->hash_keys_.size());
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < filter_write->hash_keys_.size(); ++i)
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(filter_write->hash_keys_[i], filter_read->hash_keys_[i]);
108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(filter_write->size(), filter_read->size());
110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(0,
112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      memcmp(filter_write->data(), filter_read->data(), filter_read->size()));
113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
114