safe_browsing_store.h revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
6#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
7#pragma once
8
9#include <vector>
10
11#include "base/basictypes.h"
12#include "base/callback.h"
13#include "base/hash_tables.h"
14#include "base/task.h"
15#include "base/time.h"
16#include "chrome/browser/safe_browsing/safe_browsing_util.h"
17
18class FilePath;
19
20// SafeBrowsingStore provides a storage abstraction for the
21// safe-browsing data used to build the bloom filter.  The items
22// stored are:
23//   The set of add and sub chunks seen.
24//   List of SBAddPrefix (chunk_id and SBPrefix).
25//   List of SBSubPrefix (chunk_id and the target SBAddPrefix).
26//   List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash).
27//   List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash).
28//
29// The store is geared towards updating the data, not runtime access
30// to the data (that is handled by SafeBrowsingDatabase).  Updates are
31// handled similar to a SQL transaction cycle, with the new data being
32// returned from FinishUpdate() (the COMMIT).  Data is not persistent
33// until FinishUpdate() returns successfully.
34//
35// FinishUpdate() also handles dropping items who's chunk has been
36// deleted, and netting out the add/sub lists (when a sub matches an
37// add, both are dropped).
38
39// GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so
40// that these items can be generically compared with each other by
41// SBAddPrefixLess() and SBAddPrefixHashLess().
42
43struct SBAddPrefix {
44  int32 chunk_id;
45  SBPrefix prefix;
46
47  SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {}
48  SBAddPrefix() : chunk_id(), prefix() {}
49
50  int32 GetAddChunkId() const { return chunk_id; }
51  SBPrefix GetAddPrefix() const { return prefix; }
52};
53
54struct SBSubPrefix {
55  int32 chunk_id;
56  int32 add_chunk_id;
57  SBPrefix add_prefix;
58
59  SBSubPrefix(int32 id, int32 add_id, int prefix)
60      : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {}
61  SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {}
62
63  int32 GetAddChunkId() const { return add_chunk_id; }
64  SBPrefix GetAddPrefix() const { return add_prefix; }
65};
66
67struct SBAddFullHash {
68  int32 chunk_id;
69  int32 received;
70  SBFullHash full_hash;
71
72  SBAddFullHash(int32 id, base::Time r, const SBFullHash& h)
73      : chunk_id(id),
74        received(static_cast<int32>(r.ToTimeT())),
75        full_hash(h) {
76  }
77
78  // Provided for ReadAddHashes() implementations, which already have
79  // an int32 for the time.
80  SBAddFullHash(int32 id, int32 r, const SBFullHash& h)
81      : chunk_id(id), received(r), full_hash(h) {}
82
83  SBAddFullHash() : chunk_id(), received(), full_hash() {}
84
85  int32 GetAddChunkId() const { return chunk_id; }
86  SBPrefix GetAddPrefix() const { return full_hash.prefix; }
87};
88
89struct SBSubFullHash {
90  int32 chunk_id;
91  int32 add_chunk_id;
92  SBFullHash full_hash;
93
94  SBSubFullHash(int32 id, int32 add_id, const SBFullHash& h)
95      : chunk_id(id), add_chunk_id(add_id), full_hash(h) {}
96  SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {}
97
98  int32 GetAddChunkId() const { return add_chunk_id; }
99  SBPrefix GetAddPrefix() const { return full_hash.prefix; }
100};
101
102// Determine less-than based on add chunk and prefix.
103template <class T, class U>
104bool SBAddPrefixLess(const T& a, const U& b) {
105  if (a.GetAddChunkId() != b.GetAddChunkId())
106    return a.GetAddChunkId() < b.GetAddChunkId();
107
108  return a.GetAddPrefix() < b.GetAddPrefix();
109}
110
111// Determine less-than based on add chunk, prefix, and full hash.
112// Prefix can compare differently than hash due to byte ordering,
113// so it must take precedence.
114template <class T, class U>
115bool SBAddPrefixHashLess(const T& a, const U& b) {
116  if (SBAddPrefixLess(a, b))
117    return true;
118
119  if (SBAddPrefixLess(b, a))
120    return false;
121
122  return memcmp(a.full_hash.full_hash, b.full_hash.full_hash,
123                sizeof(a.full_hash.full_hash)) < 0;
124}
125
126// Process the lists for subs which knock out adds.  For any item in
127// |sub_prefixes| which has a match in |add_prefixes|, knock out the
128// matched items from all vectors.  Additionally remove items from
129// deleted chunks.
130//
131// TODO(shess): Since the prefixes are uniformly-distributed hashes,
132// there aren't many ways to organize the inputs for efficient
133// processing.  For this reason, the vectors are sorted and processed
134// in parallel.  At this time this code does the sorting internally,
135// but it might make sense to make sorting an API requirement so that
136// the storage can optimize for it.
137//
138// TODO(shess): The original code did not process |sub_full_hashes|
139// for matches in |add_full_hashes|, so this code doesn't, either.  I
140// think this is probably a bug.
141void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes,
142                   std::vector<SBSubPrefix>* sub_prefixes,
143                   std::vector<SBAddFullHash>* add_full_hashes,
144                   std::vector<SBSubFullHash>* sub_full_hashes,
145                   const base::hash_set<int32>& add_chunks_deleted,
146                   const base::hash_set<int32>& sub_chunks_deleted);
147
148// TODO(shess): This uses int32 rather than int because it's writing
149// specifically-sized items to files.  SBPrefix should likewise be
150// explicitly sized.
151
152// Abstract interface for storing data.
153class SafeBrowsingStore {
154 public:
155  SafeBrowsingStore() {}
156  virtual ~SafeBrowsingStore() {}
157
158  // Sets up the information for later use, but does not necessarily
159  // check whether the underlying file exists, or is valid.  If
160  // |curruption_callback| is non-NULL it will be called if corruption
161  // is detected, which could happen as part of any call other than
162  // Delete().  The appropriate action is to use Delete() to clear the
163  // store.
164  virtual void Init(const FilePath& filename,
165                    Callback0::Type* corruption_callback) = 0;
166
167  // Deletes the files which back the store, returning true if
168  // successful.
169  virtual bool Delete() = 0;
170
171  // Start an update.  None of the following methods should be called
172  // unless this returns true.  If this returns true, the update
173  // should be terminated by FinishUpdate() or CancelUpdate().
174  virtual bool BeginUpdate() = 0;
175
176  // Start a chunk of data.  None of the methods through FinishChunk()
177  // should be called unless this returns true.
178  // TODO(shess): Would it make sense for this to accept |chunk_id|?
179  // Possibly not, because of possible confusion between sub_chunk_id
180  // and add_chunk_id.
181  virtual bool BeginChunk() = 0;
182
183  virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0;
184  virtual bool WriteAddHash(int32 chunk_id,
185                            base::Time receive_time,
186                            const SBFullHash& full_hash) = 0;
187  virtual bool WriteSubPrefix(int32 chunk_id,
188                              int32 add_chunk_id, SBPrefix prefix) = 0;
189  virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id,
190                            const SBFullHash& full_hash) = 0;
191
192  // Collect the chunk data and preferrably store it on disk to
193  // release memory.  Shoul not modify the data in-place.
194  virtual bool FinishChunk() = 0;
195
196  // Track the chunks which have been seen.
197  virtual void SetAddChunk(int32 chunk_id) = 0;
198  virtual bool CheckAddChunk(int32 chunk_id) = 0;
199  virtual void GetAddChunks(std::vector<int32>* out) = 0;
200  virtual void SetSubChunk(int32 chunk_id) = 0;
201  virtual bool CheckSubChunk(int32 chunk_id) = 0;
202  virtual void GetSubChunks(std::vector<int32>* out) = 0;
203
204  // Delete the indicated chunk_id.  The chunk will continue to be
205  // visible until the end of the transaction.
206  virtual void DeleteAddChunk(int32 chunk_id) = 0;
207  virtual void DeleteSubChunk(int32 chunk_id) = 0;
208
209  // Pass the collected chunks through SBPRocessSubs() and commit to
210  // permanent storage.  The resulting add prefixes and hashes will be
211  // stored in |add_prefixes_result| and |add_full_hashes_result|.
212  // |pending_adds| is the set of full hashes which have been received
213  // since the previous update, and is provided as a convenience
214  // (could be written via WriteAddHash(), but that would flush the
215  // chunk to disk).
216  virtual bool FinishUpdate(
217      const std::vector<SBAddFullHash>& pending_adds,
218      std::vector<SBAddPrefix>* add_prefixes_result,
219      std::vector<SBAddFullHash>* add_full_hashes_result) = 0;
220
221  // Cancel the update in process and remove any temporary disk
222  // storage, leaving the original data unmodified.
223  virtual bool CancelUpdate() = 0;
224
225 private:
226  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore);
227};
228
229#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
230