safe_browsing_store.h revision 3345a6884c488ff3a535c2c9acdd33d74b37e311
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 6#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 7#pragma once 8 9#include <vector> 10 11#include "base/basictypes.h" 12#include "base/callback.h" 13#include "base/hash_tables.h" 14#include "base/task.h" 15#include "base/time.h" 16#include "chrome/browser/safe_browsing/safe_browsing_util.h" 17 18class FilePath; 19 20// SafeBrowsingStore provides a storage abstraction for the 21// safe-browsing data used to build the bloom filter. The items 22// stored are: 23// The set of add and sub chunks seen. 24// List of SBAddPrefix (chunk_id and SBPrefix). 25// List of SBSubPrefix (chunk_id and the target SBAddPrefix). 26// List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash). 27// List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash). 28// 29// The store is geared towards updating the data, not runtime access 30// to the data (that is handled by SafeBrowsingDatabase). Updates are 31// handled similar to a SQL transaction cycle, with the new data being 32// returned from FinishUpdate() (the COMMIT). Data is not persistent 33// until FinishUpdate() returns successfully. 34// 35// FinishUpdate() also handles dropping items who's chunk has been 36// deleted, and netting out the add/sub lists (when a sub matches an 37// add, both are dropped). 38 39// GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so 40// that these items can be generically compared with each other by 41// SBAddPrefixLess() and SBAddPrefixHashLess(). 42 43struct SBAddPrefix { 44 int32 chunk_id; 45 SBPrefix prefix; 46 47 SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {} 48 SBAddPrefix() : chunk_id(), prefix() {} 49 50 int32 GetAddChunkId() const { return chunk_id; } 51 SBPrefix GetAddPrefix() const { return prefix; } 52}; 53 54struct SBSubPrefix { 55 int32 chunk_id; 56 int32 add_chunk_id; 57 SBPrefix add_prefix; 58 59 SBSubPrefix(int32 id, int32 add_id, int prefix) 60 : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {} 61 SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {} 62 63 int32 GetAddChunkId() const { return add_chunk_id; } 64 SBPrefix GetAddPrefix() const { return add_prefix; } 65}; 66 67struct SBAddFullHash { 68 int32 chunk_id; 69 int32 received; 70 SBFullHash full_hash; 71 72 SBAddFullHash(int32 id, base::Time r, SBFullHash h) 73 : chunk_id(id), 74 received(static_cast<int32>(r.ToTimeT())), 75 full_hash(h) { 76 } 77 78 // Provided for ReadAddHashes() implementations, which already have 79 // an int32 for the time. 80 SBAddFullHash(int32 id, int32 r, SBFullHash h) 81 : chunk_id(id), received(r), full_hash(h) {} 82 83 SBAddFullHash() : chunk_id(), received(), full_hash() {} 84 85 int32 GetAddChunkId() const { return chunk_id; } 86 SBPrefix GetAddPrefix() const { return full_hash.prefix; } 87}; 88 89struct SBSubFullHash { 90 int32 chunk_id; 91 int32 add_chunk_id; 92 SBFullHash full_hash; 93 94 SBSubFullHash(int32 id, int32 add_id, SBFullHash h) 95 : chunk_id(id), add_chunk_id(add_id), full_hash(h) {} 96 SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {} 97 98 int32 GetAddChunkId() const { return add_chunk_id; } 99 SBPrefix GetAddPrefix() const { return full_hash.prefix; } 100}; 101 102// Determine less-than based on add chunk and prefix. 103template <class T, class U> 104bool SBAddPrefixLess(const T& a, const U& b) { 105 if (a.GetAddChunkId() != b.GetAddChunkId()) 106 return a.GetAddChunkId() < b.GetAddChunkId(); 107 108 return a.GetAddPrefix() < b.GetAddPrefix(); 109} 110 111// Determine less-than based on add chunk, prefix, and full hash. 112// Prefix can compare differently than hash due to byte ordering, 113// so it must take precedence. 114template <class T, class U> 115bool SBAddPrefixHashLess(const T& a, const U& b) { 116 if (SBAddPrefixLess(a, b)) 117 return true; 118 119 if (SBAddPrefixLess(b, a)) 120 return false; 121 122 return memcmp(a.full_hash.full_hash, b.full_hash.full_hash, 123 sizeof(a.full_hash.full_hash)) < 0; 124} 125 126// Process the lists for subs which knock out adds. For any item in 127// |sub_prefixes| which has a match in |add_prefixes|, knock out the 128// matched items from all vectors. Additionally remove items from 129// deleted chunks. 130// 131// TODO(shess): Since the prefixes are uniformly-distributed hashes, 132// there aren't many ways to organize the inputs for efficient 133// processing. For this reason, the vectors are sorted and processed 134// in parallel. At this time this code does the sorting internally, 135// but it might make sense to make sorting an API requirement so that 136// the storage can optimize for it. 137// 138// TODO(shess): The original code did not process |sub_full_hashes| 139// for matches in |add_full_hashes|, so this code doesn't, either. I 140// think this is probably a bug. 141void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes, 142 std::vector<SBSubPrefix>* sub_prefixes, 143 std::vector<SBAddFullHash>* add_full_hashes, 144 std::vector<SBSubFullHash>* sub_full_hashes, 145 const base::hash_set<int32>& add_chunks_deleted, 146 const base::hash_set<int32>& sub_chunks_deleted); 147 148// TODO(shess): This uses int32 rather than int because it's writing 149// specifically-sized items to files. SBPrefix should likewise be 150// explicitly sized. 151 152// Abstract interface for storing data. 153class SafeBrowsingStore { 154 public: 155 SafeBrowsingStore() {} 156 virtual ~SafeBrowsingStore() {} 157 158 // Sets up the information for later use, but does not necessarily 159 // check whether the underlying file exists, or is valid. If 160 // |curruption_callback| is non-NULL it will be called if corruption 161 // is detected, which could happen as part of any call other than 162 // Delete(). The appropriate action is to use Delete() to clear the 163 // store. 164 virtual void Init(const FilePath& filename, 165 Callback0::Type* corruption_callback) = 0; 166 167 // Deletes the files which back the store, returning true if 168 // successful. 169 virtual bool Delete() = 0; 170 171 // Start an update. None of the following methods should be called 172 // unless this returns true. If this returns true, the update 173 // should be terminated by FinishUpdate() or CancelUpdate(). 174 virtual bool BeginUpdate() = 0; 175 176 // Start a chunk of data. None of the methods through FinishChunk() 177 // should be called unless this returns true. 178 // TODO(shess): Would it make sense for this to accept |chunk_id|? 179 // Possibly not, because of possible confusion between sub_chunk_id 180 // and add_chunk_id. 181 virtual bool BeginChunk() = 0; 182 183 virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0; 184 virtual bool WriteAddHash(int32 chunk_id, 185 base::Time receive_time, SBFullHash full_hash) = 0; 186 virtual bool WriteSubPrefix(int32 chunk_id, 187 int32 add_chunk_id, SBPrefix prefix) = 0; 188 virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, 189 SBFullHash full_hash) = 0; 190 191 // Collect the chunk data and preferrably store it on disk to 192 // release memory. Shoul not modify the data in-place. 193 virtual bool FinishChunk() = 0; 194 195 // Track the chunks which have been seen. 196 virtual void SetAddChunk(int32 chunk_id) = 0; 197 virtual bool CheckAddChunk(int32 chunk_id) = 0; 198 virtual void GetAddChunks(std::vector<int32>* out) = 0; 199 virtual void SetSubChunk(int32 chunk_id) = 0; 200 virtual bool CheckSubChunk(int32 chunk_id) = 0; 201 virtual void GetSubChunks(std::vector<int32>* out) = 0; 202 203 // Delete the indicated chunk_id. The chunk will continue to be 204 // visible until the end of the transaction. 205 virtual void DeleteAddChunk(int32 chunk_id) = 0; 206 virtual void DeleteSubChunk(int32 chunk_id) = 0; 207 208 // Pass the collected chunks through SBPRocessSubs() and commit to 209 // permanent storage. The resulting add prefixes and hashes will be 210 // stored in |add_prefixes_result| and |add_full_hashes_result|. 211 // |pending_adds| is the set of full hashes which have been received 212 // since the previous update, and is provided as a convenience 213 // (could be written via WriteAddHash(), but that would flush the 214 // chunk to disk). 215 virtual bool FinishUpdate( 216 const std::vector<SBAddFullHash>& pending_adds, 217 std::vector<SBAddPrefix>* add_prefixes_result, 218 std::vector<SBAddFullHash>* add_full_hashes_result) = 0; 219 220 // Cancel the update in process and remove any temporary disk 221 // storage, leaving the original data unmodified. 222 virtual bool CancelUpdate() = 0; 223 224 private: 225 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore); 226}; 227 228#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 229