1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/bloom_filter.h"
6
7#include <limits.h>
8
9#include <set>
10#include <vector>
11
12#include "base/file_util.h"
13#include "base/logging.h"
14#include "base/memory/ref_counted.h"
15#include "base/path_service.h"
16#include "base/rand_util.h"
17#include "base/string_util.h"
18#include "base/memory/scoped_temp_dir.h"
19#include "testing/gtest/include/gtest/gtest.h"
20
21namespace {
22
23SBPrefix GenHash() {
24  return static_cast<SBPrefix>(base::RandUint64());
25}
26
27}
28
29TEST(SafeBrowsingBloomFilter, BloomFilterUse) {
30  // Use a small number for unit test so it's not slow.
31  int count = 1000;
32
33  // Build up the bloom filter.
34  scoped_refptr<BloomFilter> filter(
35      new BloomFilter(count * BloomFilter::kBloomFilterSizeRatio));
36
37  typedef std::set<SBPrefix> Values;
38  Values values;
39  for (int i = 0; i < count; ++i) {
40    SBPrefix value = GenHash();
41    values.insert(value);
42    filter->Insert(value);
43  }
44
45  // Check serialization works.
46  char* data_copy = new char[filter->size()];
47  memcpy(data_copy, filter->data(), filter->size());
48  scoped_refptr<BloomFilter> filter_copy(
49      new BloomFilter(data_copy, filter->size(), filter->hash_keys_));
50
51  // Check no false negatives by ensuring that every time we inserted exists.
52  for (Values::const_iterator i = values.begin(); i != values.end(); ++i)
53    EXPECT_TRUE(filter_copy->Exists(*i));
54
55  // Check false positive error rate by checking the same number of items that
56  // we inserted, but of different values, and calculating what percentage are
57  // "found".
58  int found_count = 0;
59  int checked = 0;
60  while (true) {
61    SBPrefix value = GenHash();
62    if (values.count(value))
63      continue;
64
65    if (filter_copy->Exists(value))
66      found_count++;
67
68    checked++;
69    if (checked == count)
70      break;
71  }
72
73  // The FP rate should be 1.2%.  Keep a large margin of error because we don't
74  // want to fail this test because we happened to randomly pick a lot of FPs.
75  double fp_rate = found_count * 100.0 / count;
76  CHECK(fp_rate < 5.0);
77
78  VLOG(1) << "For safe browsing bloom filter of size " << count
79          << ", the FP rate was " << fp_rate << " %";
80}
81
82// Test that we can read and write the bloom filter file.
83TEST(SafeBrowsingBloomFilter, BloomFilterFile) {
84  // Create initial filter.
85  const int kTestEntries = BloomFilter::kBloomFilterMinSize;
86  scoped_refptr<BloomFilter> filter_write(
87      new BloomFilter(kTestEntries * BloomFilter::kBloomFilterSizeRatio));
88
89  for (int i = 0; i < kTestEntries; ++i)
90    filter_write->Insert(GenHash());
91
92  // Remove any left over test filters and serialize.
93  ScopedTempDir temp_dir;
94  ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
95  FilePath filter_path = temp_dir.path().AppendASCII("SafeBrowsingTestFilter");
96  ASSERT_TRUE(filter_write->WriteFile(filter_path));
97
98  // Create new empty filter and load from disk.
99  BloomFilter* filter = BloomFilter::LoadFile(filter_path);
100  ASSERT_TRUE(filter != NULL);
101  scoped_refptr<BloomFilter> filter_read(filter);
102
103  // Check data consistency.
104  EXPECT_EQ(filter_write->hash_keys_.size(), filter_read->hash_keys_.size());
105
106  for (size_t i = 0; i < filter_write->hash_keys_.size(); ++i)
107    EXPECT_EQ(filter_write->hash_keys_[i], filter_read->hash_keys_[i]);
108
109  EXPECT_EQ(filter_write->size(), filter_read->size());
110
111  EXPECT_EQ(0,
112      memcmp(filter_write->data(), filter_read->data(), filter_read->size()));
113}
114