safe_browsing_store.h revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 6#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 7 8#include <set> 9#include <vector> 10 11#include "base/basictypes.h" 12#include "base/callback_forward.h" 13#include "base/hash_tables.h" 14#include "base/time.h" 15#include "chrome/browser/safe_browsing/safe_browsing_util.h" 16 17class FilePath; 18 19// SafeBrowsingStore provides a storage abstraction for the 20// safe-browsing data used to build the bloom filter. The items 21// stored are: 22// The set of add and sub chunks seen. 23// List of SBAddPrefix (chunk_id and SBPrefix). 24// List of SBSubPrefix (chunk_id and the target SBAddPrefix). 25// List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash). 26// List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash). 27// 28// The store is geared towards updating the data, not runtime access 29// to the data (that is handled by SafeBrowsingDatabase). Updates are 30// handled similar to a SQL transaction cycle, with the new data being 31// returned from FinishUpdate() (the COMMIT). Data is not persistent 32// until FinishUpdate() returns successfully. 33// 34// FinishUpdate() also handles dropping items who's chunk has been 35// deleted, and netting out the add/sub lists (when a sub matches an 36// add, both are dropped). 37 38// GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so 39// that these items can be generically compared with each other by 40// SBAddPrefixLess() and SBAddPrefixHashLess(). 41 42struct SBAddPrefix { 43 int32 chunk_id; 44 SBPrefix prefix; 45 46 SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {} 47 SBAddPrefix() : chunk_id(), prefix() {} 48 49 int32 GetAddChunkId() const { return chunk_id; } 50 SBPrefix GetAddPrefix() const { return prefix; } 51}; 52 53typedef std::deque<SBAddPrefix> SBAddPrefixes; 54 55struct SBSubPrefix { 56 int32 chunk_id; 57 int32 add_chunk_id; 58 SBPrefix add_prefix; 59 60 SBSubPrefix(int32 id, int32 add_id, int prefix) 61 : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {} 62 SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {} 63 64 int32 GetAddChunkId() const { return add_chunk_id; } 65 SBPrefix GetAddPrefix() const { return add_prefix; } 66}; 67 68struct SBAddFullHash { 69 int32 chunk_id; 70 int32 received; 71 SBFullHash full_hash; 72 73 SBAddFullHash(int32 id, base::Time r, const SBFullHash& h) 74 : chunk_id(id), 75 received(static_cast<int32>(r.ToTimeT())), 76 full_hash(h) { 77 } 78 79 // Provided for ReadAddHashes() implementations, which already have 80 // an int32 for the time. 81 SBAddFullHash(int32 id, int32 r, const SBFullHash& h) 82 : chunk_id(id), received(r), full_hash(h) {} 83 84 SBAddFullHash() : chunk_id(), received(), full_hash() {} 85 86 int32 GetAddChunkId() const { return chunk_id; } 87 SBPrefix GetAddPrefix() const { return full_hash.prefix; } 88}; 89 90struct SBSubFullHash { 91 int32 chunk_id; 92 int32 add_chunk_id; 93 SBFullHash full_hash; 94 95 SBSubFullHash(int32 id, int32 add_id, const SBFullHash& h) 96 : chunk_id(id), add_chunk_id(add_id), full_hash(h) {} 97 SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {} 98 99 int32 GetAddChunkId() const { return add_chunk_id; } 100 SBPrefix GetAddPrefix() const { return full_hash.prefix; } 101}; 102 103// Determine less-than based on add chunk and prefix. 104template <class T, class U> 105bool SBAddPrefixLess(const T& a, const U& b) { 106 if (a.GetAddChunkId() != b.GetAddChunkId()) 107 return a.GetAddChunkId() < b.GetAddChunkId(); 108 109 return a.GetAddPrefix() < b.GetAddPrefix(); 110} 111 112// Determine less-than based on add chunk, prefix, and full hash. 113// Prefix can compare differently than hash due to byte ordering, 114// so it must take precedence. 115template <class T, class U> 116bool SBAddPrefixHashLess(const T& a, const U& b) { 117 if (SBAddPrefixLess(a, b)) 118 return true; 119 120 if (SBAddPrefixLess(b, a)) 121 return false; 122 123 return memcmp(a.full_hash.full_hash, b.full_hash.full_hash, 124 sizeof(a.full_hash.full_hash)) < 0; 125} 126 127// Process the lists for subs which knock out adds. For any item in 128// |sub_prefixes| which has a match in |add_prefixes|, knock out the 129// matched items from all vectors. Additionally remove items from 130// deleted chunks. 131// 132// TODO(shess): Since the prefixes are uniformly-distributed hashes, 133// there aren't many ways to organize the inputs for efficient 134// processing. For this reason, the vectors are sorted and processed 135// in parallel. At this time this code does the sorting internally, 136// but it might make sense to make sorting an API requirement so that 137// the storage can optimize for it. 138void SBProcessSubs(SBAddPrefixes* add_prefixes, 139 std::vector<SBSubPrefix>* sub_prefixes, 140 std::vector<SBAddFullHash>* add_full_hashes, 141 std::vector<SBSubFullHash>* sub_full_hashes, 142 const base::hash_set<int32>& add_chunks_deleted, 143 const base::hash_set<int32>& sub_chunks_deleted); 144 145// Records a histogram of the number of items in |prefix_misses| which 146// are not in |add_prefixes|. 147void SBCheckPrefixMisses(const SBAddPrefixes& add_prefixes, 148 const std::set<SBPrefix>& prefix_misses); 149 150// TODO(shess): This uses int32 rather than int because it's writing 151// specifically-sized items to files. SBPrefix should likewise be 152// explicitly sized. 153 154// Abstract interface for storing data. 155class SafeBrowsingStore { 156 public: 157 SafeBrowsingStore() {} 158 virtual ~SafeBrowsingStore() {} 159 160 // Sets up the information for later use, but does not necessarily 161 // check whether the underlying file exists, or is valid. If 162 // |curruption_callback| is non-NULL it will be called if corruption 163 // is detected, which could happen as part of any call other than 164 // Delete(). The appropriate action is to use Delete() to clear the 165 // store. 166 virtual void Init(const FilePath& filename, 167 const base::Closure& corruption_callback) = 0; 168 169 // Deletes the files which back the store, returning true if 170 // successful. 171 virtual bool Delete() = 0; 172 173 // Get all Add prefixes out from the store. 174 virtual bool GetAddPrefixes(SBAddPrefixes* add_prefixes) = 0; 175 176 // Get all add full-length hashes. 177 virtual bool GetAddFullHashes( 178 std::vector<SBAddFullHash>* add_full_hashes) = 0; 179 180 // Start an update. None of the following methods should be called 181 // unless this returns true. If this returns true, the update 182 // should be terminated by FinishUpdate() or CancelUpdate(). 183 virtual bool BeginUpdate() = 0; 184 185 // Start a chunk of data. None of the methods through FinishChunk() 186 // should be called unless this returns true. 187 // TODO(shess): Would it make sense for this to accept |chunk_id|? 188 // Possibly not, because of possible confusion between sub_chunk_id 189 // and add_chunk_id. 190 virtual bool BeginChunk() = 0; 191 192 virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0; 193 virtual bool WriteAddHash(int32 chunk_id, 194 base::Time receive_time, 195 const SBFullHash& full_hash) = 0; 196 virtual bool WriteSubPrefix(int32 chunk_id, 197 int32 add_chunk_id, SBPrefix prefix) = 0; 198 virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, 199 const SBFullHash& full_hash) = 0; 200 201 // Collect the chunk data and preferrably store it on disk to 202 // release memory. Shoul not modify the data in-place. 203 virtual bool FinishChunk() = 0; 204 205 // Track the chunks which have been seen. 206 virtual void SetAddChunk(int32 chunk_id) = 0; 207 virtual bool CheckAddChunk(int32 chunk_id) = 0; 208 virtual void GetAddChunks(std::vector<int32>* out) = 0; 209 virtual void SetSubChunk(int32 chunk_id) = 0; 210 virtual bool CheckSubChunk(int32 chunk_id) = 0; 211 virtual void GetSubChunks(std::vector<int32>* out) = 0; 212 213 // Delete the indicated chunk_id. The chunk will continue to be 214 // visible until the end of the transaction. 215 virtual void DeleteAddChunk(int32 chunk_id) = 0; 216 virtual void DeleteSubChunk(int32 chunk_id) = 0; 217 218 // May be called during update to verify that the storage is valid. 219 // Return true if the store seems valid. If corruption is detected, 220 // calls the corruption callback and return false. 221 // NOTE(shess): When storage was SQLite, there was no guarantee that 222 // a structurally sound database actually contained valid data, 223 // whereas SafeBrowsingStoreFile checksums the data. For now, this 224 // distinction doesn't matter. 225 virtual bool CheckValidity() = 0; 226 227 // Pass the collected chunks through SBPRocessSubs() and commit to 228 // permanent storage. The resulting add prefixes and hashes will be 229 // stored in |add_prefixes_result| and |add_full_hashes_result|. 230 // |pending_adds| is the set of full hashes which have been received 231 // since the previous update, and is provided as a convenience 232 // (could be written via WriteAddHash(), but that would flush the 233 // chunk to disk). |prefix_misses| is the set of prefixes where the 234 // |GetHash()| request returned no full hashes, used for diagnostic 235 // purposes. 236 virtual bool FinishUpdate( 237 const std::vector<SBAddFullHash>& pending_adds, 238 const std::set<SBPrefix>& prefix_misses, 239 SBAddPrefixes* add_prefixes_result, 240 std::vector<SBAddFullHash>* add_full_hashes_result) = 0; 241 242 // Cancel the update in process and remove any temporary disk 243 // storage, leaving the original data unmodified. 244 virtual bool CancelUpdate() = 0; 245 246 private: 247 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore); 248}; 249 250#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 251