safe_browsing_util.h revision dc0f95d653279beabeb9817299e2902918ba123e
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Utilities for the SafeBrowsing code.
6
7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
9#pragma once
10
11#include <cstring>
12#include <deque>
13#include <string>
14#include <vector>
15
16#include "base/basictypes.h"
17#include "chrome/browser/safe_browsing/chunk_range.h"
18
19class GURL;
20
21class SBEntry;
22
23// A truncated hash's type.
24typedef int SBPrefix;
25
26// Container for holding a chunk URL and the MAC of the contents of the URL.
27struct ChunkUrl {
28  std::string url;
29  std::string mac;
30  std::string list_name;
31};
32
33// A full hash.
34union SBFullHash {
35  char full_hash[32];
36  SBPrefix prefix;
37};
38
39inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) {
40  return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0;
41}
42
43// Container for information about a specific host in an add/sub chunk.
44struct SBChunkHost {
45  SBPrefix host;
46  SBEntry* entry;
47};
48
49// Container for an add/sub chunk.
50struct SBChunk {
51  SBChunk();
52  ~SBChunk();
53
54  int chunk_number;
55  int list_id;
56  bool is_add;
57  std::deque<SBChunkHost> hosts;
58};
59
60// Container for a set of chunks.  Interim wrapper to replace use of
61// |std::deque<SBChunk>| with something having safer memory semantics.
62// management.
63// TODO(shess): |SBEntry| is currently a very roundabout way to hold
64// things pending storage.  It could be replaced with the structures
65// used in SafeBrowsingStore, then lots of bridging code could
66// dissappear.
67class SBChunkList {
68 public:
69  SBChunkList();
70  ~SBChunkList();
71
72  // Implement that subset of the |std::deque<>| interface which
73  // callers expect.
74  bool empty() const { return chunks_.empty(); }
75  size_t size() { return chunks_.size(); }
76
77  void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
78  SBChunk& back() { return chunks_.back(); }
79  SBChunk& front() { return chunks_.front(); }
80  const SBChunk& front() const { return chunks_.front(); }
81
82  typedef std::vector<SBChunk>::const_iterator const_iterator;
83  const_iterator begin() const { return chunks_.begin(); }
84  const_iterator end() const { return chunks_.end(); }
85
86  typedef std::vector<SBChunk>::iterator iterator;
87  iterator begin() { return chunks_.begin(); }
88  iterator end() { return chunks_.end(); }
89
90  SBChunk& operator[](size_t n) { return chunks_[n]; }
91  const SBChunk& operator[](size_t n) const { return chunks_[n]; }
92
93  // Calls |SBEvent::Destroy()| before clearing |chunks_|.
94  void clear();
95
96 private:
97  std::vector<SBChunk> chunks_;
98
99  DISALLOW_COPY_AND_ASSIGN(SBChunkList);
100};
101
102// Used when we get a gethash response.
103struct SBFullHashResult {
104  SBFullHash hash;
105  std::string list_name;
106  int add_chunk_id;
107};
108
109// Contains information about a list in the database.
110struct SBListChunkRanges {
111  explicit SBListChunkRanges(const std::string& n);
112
113  std::string name;  // The list name.
114  std::string adds;  // The ranges for add chunks.
115  std::string subs;  // The ranges for sub chunks.
116};
117
118// Container for deleting chunks from the database.
119struct SBChunkDelete {
120  SBChunkDelete();
121  ~SBChunkDelete();
122
123  std::string list_name;
124  bool is_sub_del;
125  std::vector<ChunkRange> chunk_del;
126};
127
128
129// SBEntry ---------------------------------------------------------------------
130
131// Holds information about the prefixes for a hostkey.  prefixes can either be
132// 4 bytes (truncated hash) or 32 bytes (full hash).
133// For adds:
134//   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
135// For subs:
136//   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
137//       [add chunk][prefix][add chunk][prefix]
138class SBEntry {
139 public:
140  enum Type {
141    ADD_PREFIX,     // 4 byte add entry.
142    SUB_PREFIX,     // 4 byte sub entry.
143    ADD_FULL_HASH,  // 32 byte add entry.
144    SUB_FULL_HASH,  // 32 byte sub entry.
145  };
146
147  // Creates a SBEntry with the necessary size for the given number of prefixes.
148  // Caller ownes the object and needs to free it by calling Destroy.
149  static SBEntry* Create(Type type, int prefix_count);
150
151  // Frees the entry's memory.
152  void Destroy();
153
154  void set_list_id(int list_id) { data_.list_id = list_id; }
155  int list_id() const { return data_.list_id; }
156  void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
157  int chunk_id() const { return data_.chunk_id; }
158  int prefix_count() const { return data_.prefix_count; }
159
160  // Returns true if this is a prefix as opposed to a full hash.
161  bool IsPrefix() const {
162    return type() == ADD_PREFIX || type() == SUB_PREFIX;
163  }
164
165  // Returns true if this is an add entry.
166  bool IsAdd() const {
167    return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
168  }
169
170  // Returns true if this is a sub entry.
171  bool IsSub() const {
172    return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
173  }
174
175  // Helper to return the size of the prefixes.
176  int HashLen() const {
177    return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
178  }
179
180  // For add entries, returns the add chunk id.  For sub entries, returns the
181  // add_chunk id for the prefix at the given index.
182  int ChunkIdAtPrefix(int index) const;
183
184  // Used for sub chunks to set the chunk id at a given index.
185  void SetChunkIdAtPrefix(int index, int chunk_id);
186
187  // Return the prefix/full hash at the given index.  Caller is expected to
188  // call the right function based on the hash length.
189  const SBPrefix& PrefixAt(int index) const;
190  const SBFullHash& FullHashAt(int index) const;
191
192  // Return the prefix/full hash at the given index.  Caller is expected to
193  // call the right function based on the hash length.
194  void SetPrefixAt(int index, const SBPrefix& prefix);
195  void SetFullHashAt(int index, const SBFullHash& full_hash);
196
197 private:
198  // Container for a sub prefix.
199  struct SBSubPrefix {
200    int add_chunk;
201    SBPrefix prefix;
202  };
203
204  // Container for a sub full hash.
205  struct SBSubFullHash {
206    int add_chunk;
207    SBFullHash prefix;
208  };
209
210  // Keep the fixed data together in one struct so that we can get its size
211  // easily.  If any of this is modified, the database will have to be cleared.
212  struct Data {
213    int list_id;
214    // For adds, this is the add chunk number.
215    // For subs: if prefix_count is 0 then this is the add chunk that this sub
216    //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
217    //     or sub_full_hashes is used for each corresponding prefix.
218    int chunk_id;
219    Type type;
220    int prefix_count;
221  };
222
223  SBEntry();
224  ~SBEntry();
225
226  // Helper to return the size of each prefix entry (i.e. for subs this
227  // includes an add chunk id).
228  static int PrefixSize(Type type);
229
230  // Helper to return how much memory a given Entry would require.
231  static int Size(Type type, int prefix_count);
232
233  // Returns how many bytes this entry is.
234  int Size() const;
235
236  Type type() const { return data_.type; }
237
238  void set_prefix_count(int count) { data_.prefix_count = count; }
239  void set_type(Type type) { data_.type = type; }
240
241  // The prefixes union must follow the fixed data so that they're contiguous
242  // in memory.
243  Data data_;
244  union {
245    SBPrefix add_prefixes_[1];
246    SBSubPrefix sub_prefixes_[1];
247    SBFullHash add_full_hashes_[1];
248    SBSubFullHash sub_full_hashes_[1];
249  };
250};
251
252
253// Utility functions -----------------------------------------------------------
254
255namespace safe_browsing_util {
256
257// SafeBrowsing list names.
258extern const char kMalwareList[];
259extern const char kPhishingList[];
260// Binary Download list names.
261extern const char kBinUrlList[];
262extern const char kBinHashList[];
263
264enum ListType {
265  INVALID = -1,
266  MALWARE = 0,
267  PHISH = 1,
268  BINURL = 2,
269  BINHASH = 3,
270};
271
272// Maps a list name to ListType.
273int GetListId(const std::string& name);
274// Maps a ListId to list name. Return false if fails.
275bool GetListName(int list_id, std::string* list);
276
277
278// Canonicalizes url as per Google Safe Browsing Specification.
279// See section 6.1 in
280// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
281void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
282                     std::string* canonicalized_path,
283                     std::string* canonicalized_query);
284
285// Given a URL, returns all the hosts we need to check.  They are returned
286// in order of size (i.e. b.c is first, then a.b.c).
287void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
288
289// Given a URL, returns all the paths we need to check.
290void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
291
292int GetHashIndex(const SBFullHash& hash,
293                 const std::vector<SBFullHashResult>& full_hashes);
294
295// Given a URL, compare all the possible host + path full hashes to the set of
296// provided full hashes.  Returns the index of the match if one is found, or -1
297// otherwise.
298int GetUrlHashIndex(const GURL& url,
299                    const std::vector<SBFullHashResult>& full_hashes);
300
301bool IsPhishingList(const std::string& list_name);
302bool IsMalwareList(const std::string& list_name);
303bool IsBadbinurlList(const std::string& list_name);
304bool IsBadbinhashList(const std::string& list_name);
305
306// Returns 'true' if 'mac' can be verified using 'key' and 'data'.
307bool VerifyMAC(const std::string& key,
308               const std::string& mac,
309               const char* data,
310               int data_length);
311
312GURL GeneratePhishingReportUrl(const std::string& report_page,
313                               const std::string& url_to_report);
314
315void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out);
316
317}  // namespace safe_browsing_util
318
319#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
320