safe_browsing_util.h revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Utilities for the SafeBrowsing code.
6
7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
9#pragma once
10
11#include <cstring>
12#include <deque>
13#include <string>
14#include <vector>
15
16#include "base/basictypes.h"
17#include "chrome/browser/safe_browsing/chunk_range.h"
18
19class GURL;
20
21class SBEntry;
22
23// A truncated hash's type.
24typedef int SBPrefix;
25
26// Container for holding a chunk URL and the MAC of the contents of the URL.
27struct ChunkUrl {
28  std::string url;
29  std::string mac;
30  std::string list_name;
31};
32
33// A full hash.
34union SBFullHash {
35  char full_hash[32];
36  SBPrefix prefix;
37};
38
39inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) {
40  return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0;
41}
42
43// Container for information about a specific host in an add/sub chunk.
44struct SBChunkHost {
45  SBPrefix host;
46  SBEntry* entry;
47};
48
49// Container for an add/sub chunk.
50struct SBChunk {
51  SBChunk();
52  ~SBChunk();
53
54  int chunk_number;
55  int list_id;
56  bool is_add;
57  std::deque<SBChunkHost> hosts;
58};
59
60// Container for a set of chunks.  Interim wrapper to replace use of
61// |std::deque<SBChunk>| with something having safer memory semantics.
62// management.
63// TODO(shess): |SBEntry| is currently a very roundabout way to hold
64// things pending storage.  It could be replaced with the structures
65// used in SafeBrowsingStore, then lots of bridging code could
66// dissappear.
67class SBChunkList {
68 public:
69  SBChunkList();
70  ~SBChunkList();
71
72  // Implement that subset of the |std::deque<>| interface which
73  // callers expect.
74  bool empty() const { return chunks_.empty(); }
75  size_t size() { return chunks_.size(); }
76
77  void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
78  SBChunk& back() { return chunks_.back(); }
79  SBChunk& front() { return chunks_.front(); }
80  const SBChunk& front() const { return chunks_.front(); }
81
82  typedef std::vector<SBChunk>::const_iterator const_iterator;
83  const_iterator begin() const { return chunks_.begin(); }
84  const_iterator end() const { return chunks_.end(); }
85
86  typedef std::vector<SBChunk>::iterator iterator;
87  iterator begin() { return chunks_.begin(); }
88  iterator end() { return chunks_.end(); }
89
90  SBChunk& operator[](size_t n) { return chunks_[n]; }
91  const SBChunk& operator[](size_t n) const { return chunks_[n]; }
92
93  // Calls |SBEvent::Destroy()| before clearing |chunks_|.
94  void clear();
95
96 private:
97  std::vector<SBChunk> chunks_;
98
99  DISALLOW_COPY_AND_ASSIGN(SBChunkList);
100};
101
102// Used when we get a gethash response.
103struct SBFullHashResult {
104  SBFullHash hash;
105  std::string list_name;
106  int add_chunk_id;
107};
108
109// Contains information about a list in the database.
110struct SBListChunkRanges {
111  explicit SBListChunkRanges(const std::string& n);
112
113  std::string name;  // The list name.
114  std::string adds;  // The ranges for add chunks.
115  std::string subs;  // The ranges for sub chunks.
116};
117
118// Container for deleting chunks from the database.
119struct SBChunkDelete {
120  SBChunkDelete();
121  ~SBChunkDelete();
122
123  std::string list_name;
124  bool is_sub_del;
125  std::vector<ChunkRange> chunk_del;
126};
127
128
129// SBEntry ---------------------------------------------------------------------
130
131// Holds information about the prefixes for a hostkey.  prefixes can either be
132// 4 bytes (truncated hash) or 32 bytes (full hash).
133// For adds:
134//   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
135// For subs:
136//   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
137//       [add chunk][prefix][add chunk][prefix]
138class SBEntry {
139 public:
140  enum Type {
141    ADD_PREFIX,     // 4 byte add entry.
142    SUB_PREFIX,     // 4 byte sub entry.
143    ADD_FULL_HASH,  // 32 byte add entry.
144    SUB_FULL_HASH,  // 32 byte sub entry.
145  };
146
147  // Creates a SBEntry with the necessary size for the given number of prefixes.
148  // Caller ownes the object and needs to free it by calling Destroy.
149  static SBEntry* Create(Type type, int prefix_count);
150
151  // Frees the entry's memory.
152  void Destroy();
153
154  void set_list_id(int list_id) { data_.list_id = list_id; }
155  int list_id() const { return data_.list_id; }
156  void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
157  int chunk_id() const { return data_.chunk_id; }
158  int prefix_count() const { return data_.prefix_count; }
159
160  // Returns a new entry that is larger by the given number of prefixes, with
161  // all the existing data already copied over.  The old entry is destroyed.
162  SBEntry* Enlarge(int extra_prefixes);
163
164  // Returns true if this is a prefix as opposed to a full hash.
165  bool IsPrefix() const {
166    return type() == ADD_PREFIX || type() == SUB_PREFIX;
167  }
168
169  // Returns true if this is an add entry.
170  bool IsAdd() const {
171    return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
172  }
173
174  // Returns true if this is a sub entry.
175  bool IsSub() const {
176    return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
177  }
178
179  // Helper to return the size of the prefixes.
180  int HashLen() const {
181    return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
182  }
183
184  // For add entries, returns the add chunk id.  For sub entries, returns the
185  // add_chunk id for the prefix at the given index.
186  int ChunkIdAtPrefix(int index) const;
187
188  // Used for sub chunks to set the chunk id at a given index.
189  void SetChunkIdAtPrefix(int index, int chunk_id);
190
191  // Return the prefix/full hash at the given index.  Caller is expected to
192  // call the right function based on the hash length.
193  const SBPrefix& PrefixAt(int index) const;
194  const SBFullHash& FullHashAt(int index) const;
195
196  // Return the prefix/full hash at the given index.  Caller is expected to
197  // call the right function based on the hash length.
198  void SetPrefixAt(int index, const SBPrefix& prefix);
199  void SetFullHashAt(int index, const SBFullHash& full_hash);
200
201 private:
202  // Container for a sub prefix.
203  struct SBSubPrefix {
204    int add_chunk;
205    SBPrefix prefix;
206  };
207
208  // Container for a sub full hash.
209  struct SBSubFullHash {
210    int add_chunk;
211    SBFullHash prefix;
212  };
213
214  // Keep the fixed data together in one struct so that we can get its size
215  // easily.  If any of this is modified, the database will have to be cleared.
216  struct Data {
217    int list_id;
218    // For adds, this is the add chunk number.
219    // For subs: if prefix_count is 0 then this is the add chunk that this sub
220    //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
221    //     or sub_full_hashes is used for each corresponding prefix.
222    int chunk_id;
223    Type type;
224    int prefix_count;
225  };
226
227  SBEntry();
228  ~SBEntry();
229
230  // Helper to return the size of each prefix entry (i.e. for subs this
231  // includes an add chunk id).
232  static int PrefixSize(Type type);
233
234  // Helper to return how much memory a given Entry would require.
235  static int Size(Type type, int prefix_count);
236
237  // Returns how many bytes this entry is.
238  int Size() const;
239
240  Type type() const { return data_.type; }
241
242  void set_prefix_count(int count) { data_.prefix_count = count; }
243  void set_type(Type type) { data_.type = type; }
244
245  // The prefixes union must follow the fixed data so that they're contiguous
246  // in memory.
247  Data data_;
248  union {
249    SBPrefix add_prefixes_[1];
250    SBSubPrefix sub_prefixes_[1];
251    SBFullHash add_full_hashes_[1];
252    SBSubFullHash sub_full_hashes_[1];
253  };
254};
255
256
257// Utility functions -----------------------------------------------------------
258
259namespace safe_browsing_util {
260
261// SafeBrowsing list names.
262extern const char kMalwareList[];
263extern const char kPhishingList[];
264
265// Converts between the SafeBrowsing list names and their enumerated value.
266// If the list names change, both of these methods must be updated.
267enum ListType {
268  INVALID = -1,
269  MALWARE = 0,
270  PHISH = 1,
271};
272int GetListId(const std::string& name);
273std::string GetListName(int list_id);
274
275// Canonicalizes url as per Google Safe Browsing Specification.
276// See section 6.1 in
277// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
278void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
279                     std::string* canonicalized_path,
280                     std::string* canonicalized_query);
281
282// Given a URL, returns all the hosts we need to check.  They are returned
283// in order of size (i.e. b.c is first, then a.b.c).
284void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
285
286// Given a URL, returns all the paths we need to check.
287void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
288
289// Given a URL, compare all the possible host + path full hashes to the set of
290// provided full hashes.  Returns the index of the match if one is found, or -1
291// otherwise.
292int CompareFullHashes(const GURL& url,
293                      const std::vector<SBFullHashResult>& full_hashes);
294
295bool IsPhishingList(const std::string& list_name);
296bool IsMalwareList(const std::string& list_name);
297
298// Returns 'true' if 'mac' can be verified using 'key' and 'data'.
299bool VerifyMAC(const std::string& key,
300               const std::string& mac,
301               const char* data,
302               int data_length);
303
304GURL GeneratePhishingReportUrl(const std::string& report_page,
305                               const std::string& url_to_report);
306
307}  // namespace safe_browsing_util
308
309#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
310