1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
6#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
7
8#include <deque>
9#include <set>
10#include <vector>
11
12#include "base/basictypes.h"
13#include "base/callback_forward.h"
14#include "base/containers/hash_tables.h"
15#include "base/time/time.h"
16#include "chrome/browser/safe_browsing/prefix_set.h"
17#include "chrome/browser/safe_browsing/safe_browsing_util.h"
18
19namespace base {
20class FilePath;
21}
22
23// SafeBrowsingStore provides a storage abstraction for the
24// safe-browsing data used to build the bloom filter.  The items
25// stored are:
26//   The set of add and sub chunks seen.
27//   List of SBAddPrefix (chunk_id and SBPrefix).
28//   List of SBSubPrefix (chunk_id and the target SBAddPrefix).
29//   List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash).
30//   List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash).
31//
32// The store is geared towards updating the data, not runtime access
33// to the data (that is handled by SafeBrowsingDatabase).  Updates are
34// handled similar to a SQL transaction cycle, with the new data being
35// returned from FinishUpdate() (the COMMIT).  Data is not persistent
36// until FinishUpdate() returns successfully.
37//
38// FinishUpdate() also handles dropping items who's chunk has been
39// deleted, and netting out the add/sub lists (when a sub matches an
40// add, both are dropped).
41
42// GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so
43// that these items can be generically compared with each other by
44// SBAddPrefixLess() and SBAddPrefixHashLess().
45
46struct SBAddPrefix {
47  int32 chunk_id;
48  SBPrefix prefix;
49
50  SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {}
51  SBAddPrefix() : chunk_id(), prefix() {}
52
53  int32 GetAddChunkId() const { return chunk_id; }
54  SBPrefix GetAddPrefix() const { return prefix; }
55};
56
57// TODO(shess): Measure the performance impact of switching this back to
58// std::vector<> once the v8 file format dominates.  Also SBSubPrefixes.
59typedef std::deque<SBAddPrefix> SBAddPrefixes;
60
61struct SBSubPrefix {
62  int32 chunk_id;
63  int32 add_chunk_id;
64  SBPrefix add_prefix;
65
66  SBSubPrefix(int32 id, int32 add_id, SBPrefix prefix)
67      : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {}
68  SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {}
69
70  int32 GetAddChunkId() const { return add_chunk_id; }
71  SBPrefix GetAddPrefix() const { return add_prefix; }
72};
73
74typedef std::deque<SBSubPrefix> SBSubPrefixes;
75
76struct SBAddFullHash {
77  int32 chunk_id;
78  // Received field is not used anymore, but is kept for DB compatability.
79  // TODO(shess): Deprecate and remove.
80  int32 deprecated_received;
81  SBFullHash full_hash;
82
83  SBAddFullHash(int32 id, const SBFullHash& h)
84      : chunk_id(id), deprecated_received(), full_hash(h) {}
85
86  SBAddFullHash() : chunk_id(), deprecated_received(), full_hash() {}
87
88  int32 GetAddChunkId() const { return chunk_id; }
89  SBPrefix GetAddPrefix() const { return full_hash.prefix; }
90};
91
92struct SBSubFullHash {
93  int32 chunk_id;
94  int32 add_chunk_id;
95  SBFullHash full_hash;
96
97  SBSubFullHash(int32 id, int32 add_id, const SBFullHash& h)
98      : chunk_id(id), add_chunk_id(add_id), full_hash(h) {}
99  SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {}
100
101  int32 GetAddChunkId() const { return add_chunk_id; }
102  SBPrefix GetAddPrefix() const { return full_hash.prefix; }
103};
104
105// Determine less-than based on prefix and add chunk.
106template <class T, class U>
107bool SBAddPrefixLess(const T& a, const U& b) {
108  if (a.GetAddPrefix() != b.GetAddPrefix())
109    return a.GetAddPrefix() < b.GetAddPrefix();
110
111  return a.GetAddChunkId() < b.GetAddChunkId();
112}
113
114// Determine less-than based on prefix, add chunk, and full hash.
115// Prefix can compare differently than hash due to byte ordering,
116// so it must take precedence.
117template <class T, class U>
118bool SBAddPrefixHashLess(const T& a, const U& b) {
119  if (SBAddPrefixLess(a, b))
120    return true;
121
122  if (SBAddPrefixLess(b, a))
123    return false;
124
125  return memcmp(a.full_hash.full_hash, b.full_hash.full_hash,
126                sizeof(a.full_hash.full_hash)) < 0;
127}
128
129// Process the lists for subs which knock out adds.  For any item in
130// |sub_prefixes| which has a match in |add_prefixes|, knock out the
131// matched items from all vectors.  Additionally remove items from
132// deleted chunks.
133//
134// The inputs must be sorted by SBAddPrefixLess or SBAddPrefixHashLess.
135void SBProcessSubs(SBAddPrefixes* add_prefixes,
136                   SBSubPrefixes* sub_prefixes,
137                   std::vector<SBAddFullHash>* add_full_hashes,
138                   std::vector<SBSubFullHash>* sub_full_hashes,
139                   const base::hash_set<int32>& add_chunks_deleted,
140                   const base::hash_set<int32>& sub_chunks_deleted);
141
142// Abstract interface for storing data.
143class SafeBrowsingStore {
144 public:
145  SafeBrowsingStore() {}
146  virtual ~SafeBrowsingStore() {}
147
148  // Sets up the information for later use, but does not necessarily
149  // check whether the underlying file exists, or is valid.  If
150  // |curruption_callback| is non-NULL it will be called if corruption
151  // is detected, which could happen as part of any call other than
152  // Delete().  The appropriate action is to use Delete() to clear the
153  // store.
154  virtual void Init(const base::FilePath& filename,
155                    const base::Closure& corruption_callback) = 0;
156
157  // Deletes the files which back the store, returning true if
158  // successful.
159  virtual bool Delete() = 0;
160
161  // Get all Add prefixes out from the store.
162  virtual bool GetAddPrefixes(SBAddPrefixes* add_prefixes) = 0;
163
164  // Get all add full-length hashes.
165  virtual bool GetAddFullHashes(
166      std::vector<SBAddFullHash>* add_full_hashes) = 0;
167
168  // Start an update.  None of the following methods should be called
169  // unless this returns true.  If this returns true, the update
170  // should be terminated by FinishUpdate() or CancelUpdate().
171  virtual bool BeginUpdate() = 0;
172
173  // Start a chunk of data.  None of the methods through FinishChunk()
174  // should be called unless this returns true.
175  // TODO(shess): Would it make sense for this to accept |chunk_id|?
176  // Possibly not, because of possible confusion between sub_chunk_id
177  // and add_chunk_id.
178  virtual bool BeginChunk() = 0;
179
180  virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0;
181  virtual bool WriteAddHash(int32 chunk_id,
182                            const SBFullHash& full_hash) = 0;
183  virtual bool WriteSubPrefix(int32 chunk_id,
184                              int32 add_chunk_id, SBPrefix prefix) = 0;
185  virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id,
186                            const SBFullHash& full_hash) = 0;
187
188  // Collect the chunk data and preferrably store it on disk to
189  // release memory.  Shoul not modify the data in-place.
190  virtual bool FinishChunk() = 0;
191
192  // Track the chunks which have been seen.
193  virtual void SetAddChunk(int32 chunk_id) = 0;
194  virtual bool CheckAddChunk(int32 chunk_id) = 0;
195  virtual void GetAddChunks(std::vector<int32>* out) = 0;
196  virtual void SetSubChunk(int32 chunk_id) = 0;
197  virtual bool CheckSubChunk(int32 chunk_id) = 0;
198  virtual void GetSubChunks(std::vector<int32>* out) = 0;
199
200  // Delete the indicated chunk_id.  The chunk will continue to be
201  // visible until the end of the transaction.
202  virtual void DeleteAddChunk(int32 chunk_id) = 0;
203  virtual void DeleteSubChunk(int32 chunk_id) = 0;
204
205  // May be called during update to verify that the storage is valid.
206  // Return true if the store seems valid.  If corruption is detected,
207  // calls the corruption callback and return false.
208  // NOTE(shess): When storage was SQLite, there was no guarantee that
209  // a structurally sound database actually contained valid data,
210  // whereas SafeBrowsingStoreFile checksums the data.  For now, this
211  // distinction doesn't matter.
212  virtual bool CheckValidity() = 0;
213
214  // Pass the collected chunks through SBPRocessSubs() and commit to
215  // permanent storage.  The resulting add prefixes and hashes will be
216  // stored in |add_prefixes_result| and |add_full_hashes_result|.
217  virtual bool FinishUpdate(
218      safe_browsing::PrefixSetBuilder* builder,
219      std::vector<SBAddFullHash>* add_full_hashes_result) = 0;
220
221  // Cancel the update in process and remove any temporary disk
222  // storage, leaving the original data unmodified.
223  virtual bool CancelUpdate() = 0;
224
225 private:
226  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore);
227};
228
229#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
230