1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// A simple bloom filter. It uses a large number (20) of hashes to reduce the 6// possibility of false positives. The bloom filter's hashing uses random keys 7// in order to minimize the chance that a false positive for one user is a false 8// positive for all. 9// 10// The bloom filter manages it serialization to disk with the following file 11// format: 12// 4 byte version number 13// 4 byte number of hash keys (n) 14// n * 8 bytes of hash keys 15// Remaining bytes are the filter data. 16 17#ifndef CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ 18#define CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ 19#pragma once 20 21#include <vector> 22 23#include "base/gtest_prod_util.h" 24#include "base/memory/ref_counted.h" 25#include "base/memory/scoped_ptr.h" 26#include "chrome/browser/safe_browsing/safe_browsing_util.h" 27 28class FilePath; 29 30class BloomFilter : public base::RefCountedThreadSafe<BloomFilter> { 31 public: 32 typedef uint64 HashKey; 33 typedef std::vector<HashKey> HashKeys; 34 35 // Constructs an empty filter with the given size. 36 explicit BloomFilter(int bit_size); 37 38 // Constructs a filter from serialized data. This object owns the memory and 39 // will delete it on destruction. 40 BloomFilter(char* data, int size, const HashKeys& keys); 41 42 void Insert(SBPrefix hash); 43 bool Exists(SBPrefix hash) const; 44 45 const char* data() const { return data_.get(); } 46 int size() const { return byte_size_; } 47 48 // Loading and storing the filter from / to disk. 49 static BloomFilter* LoadFile(const FilePath& filter_name); 50 bool WriteFile(const FilePath& filter_name) const; 51 52 // How many bits to use per item. See the design doc for more information. 53 static const int kBloomFilterSizeRatio = 25; 54 55 // Force a minimum size on the bloom filter to prevent a high false positive 56 // hash request rate (in bytes). 57 static const int kBloomFilterMinSize = 250000; 58 59 // Force a maximum size on the bloom filter to avoid using too much memory 60 // (in bytes). 61 static const int kBloomFilterMaxSize = 3 * 1024 * 1024; 62 63 // Use the above constants to calculate an appropriate size to pass 64 // to the BloomFilter constructor based on the intended |key_count|. 65 // TODO(shess): This is very clunky. It would be cleaner to have 66 // the constructor manage this, but at this time the unit and perf 67 // tests wish to make their own calculations. 68 static int FilterSizeForKeyCount(int key_count); 69 70 private: 71 friend class base::RefCountedThreadSafe<BloomFilter>; 72 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterUse); 73 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterFile); 74 75 static const int kNumHashKeys = 20; 76 static const int kFileVersion = 1; 77 78 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE 79 // ORDERING OF THESE VALUES. 80 enum FailureType { 81 FAILURE_FILTER_READ_OPEN, 82 FAILURE_FILTER_READ_VERSION, 83 FAILURE_FILTER_READ_NUM_KEYS, 84 FAILURE_FILTER_READ_KEY, 85 FAILURE_FILTER_READ_DATA_MINSIZE, 86 FAILURE_FILTER_READ_DATA_MAXSIZE, 87 FAILURE_FILTER_READ_DATA_SHORT, 88 FAILURE_FILTER_READ_DATA, 89 90 // Memory space for histograms is determined by the max. ALWAYS 91 // ADD NEW VALUES BEFORE THIS ONE. 92 FAILURE_FILTER_MAX 93 }; 94 95 static void RecordFailure(FailureType failure_type); 96 97 ~BloomFilter(); 98 99 int byte_size_; // size in bytes 100 int bit_size_; // size in bits 101 scoped_array<char> data_; 102 103 // Random keys used for hashing. 104 HashKeys hash_keys_; 105 106 DISALLOW_COPY_AND_ASSIGN(BloomFilter); 107}; 108 109#endif // CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ 110