safe_browsing_util.h revision 731df977c0511bca2206b5f333555b1205ff1f43
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Utilities for the SafeBrowsing code.
6
7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
9#pragma once
10
11#include <cstring>
12#include <deque>
13#include <string>
14#include <vector>
15
16#include "base/basictypes.h"
17#include "chrome/browser/safe_browsing/chunk_range.h"
18
19class GURL;
20
21#ifdef SB_LOGGING_ENABLED
22#define SB_DLOG(severity) DLOG_IF(INFO, 1)
23#else
24#define SB_DLOG(severity) DLOG_IF(INFO, 0)
25#endif
26
27class SBEntry;
28
29// A truncated hash's type.
30typedef int SBPrefix;
31
32// Container for holding a chunk URL and the MAC of the contents of the URL.
33struct ChunkUrl {
34  std::string url;
35  std::string mac;
36  std::string list_name;
37};
38
39// A full hash.
40union SBFullHash {
41  char full_hash[32];
42  SBPrefix prefix;
43};
44
45inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) {
46  return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0;
47}
48
49// Container for information about a specific host in an add/sub chunk.
50struct SBChunkHost {
51  SBPrefix host;
52  SBEntry* entry;
53};
54
55// Container for an add/sub chunk.
56struct SBChunk {
57  SBChunk();
58  ~SBChunk();
59
60  int chunk_number;
61  int list_id;
62  bool is_add;
63  std::deque<SBChunkHost> hosts;
64};
65
66// Container for a set of chunks.  Interim wrapper to replace use of
67// |std::deque<SBChunk>| with something having safer memory semantics.
68// management.
69// TODO(shess): |SBEntry| is currently a very roundabout way to hold
70// things pending storage.  It could be replaced with the structures
71// used in SafeBrowsingStore, then lots of bridging code could
72// dissappear.
73class SBChunkList {
74 public:
75  SBChunkList();
76  ~SBChunkList();
77
78  // Implement that subset of the |std::deque<>| interface which
79  // callers expect.
80  bool empty() const { return chunks_.empty(); }
81  size_t size() { return chunks_.size(); }
82
83  void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
84  SBChunk& back() { return chunks_.back(); }
85  SBChunk& front() { return chunks_.front(); }
86  const SBChunk& front() const { return chunks_.front(); }
87
88  typedef std::vector<SBChunk>::const_iterator const_iterator;
89  const_iterator begin() const { return chunks_.begin(); }
90  const_iterator end() const { return chunks_.end(); }
91
92  typedef std::vector<SBChunk>::iterator iterator;
93  iterator begin() { return chunks_.begin(); }
94  iterator end() { return chunks_.end(); }
95
96  SBChunk& operator[](size_t n) { return chunks_[n]; }
97  const SBChunk& operator[](size_t n) const { return chunks_[n]; }
98
99  // Calls |SBEvent::Destroy()| before clearing |chunks_|.
100  void clear();
101
102 private:
103  std::vector<SBChunk> chunks_;
104
105  DISALLOW_COPY_AND_ASSIGN(SBChunkList);
106};
107
108// Used when we get a gethash response.
109struct SBFullHashResult {
110  SBFullHash hash;
111  std::string list_name;
112  int add_chunk_id;
113};
114
115// Contains information about a list in the database.
116struct SBListChunkRanges {
117  explicit SBListChunkRanges(const std::string& n);
118
119  std::string name;  // The list name.
120  std::string adds;  // The ranges for add chunks.
121  std::string subs;  // The ranges for sub chunks.
122};
123
124// Container for deleting chunks from the database.
125struct SBChunkDelete {
126  SBChunkDelete();
127  ~SBChunkDelete();
128
129  std::string list_name;
130  bool is_sub_del;
131  std::vector<ChunkRange> chunk_del;
132};
133
134
135// SBEntry ---------------------------------------------------------------------
136
137// Holds information about the prefixes for a hostkey.  prefixes can either be
138// 4 bytes (truncated hash) or 32 bytes (full hash).
139// For adds:
140//   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
141// For subs:
142//   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
143//       [add chunk][prefix][add chunk][prefix]
144class SBEntry {
145 public:
146  enum Type {
147    ADD_PREFIX,     // 4 byte add entry.
148    SUB_PREFIX,     // 4 byte sub entry.
149    ADD_FULL_HASH,  // 32 byte add entry.
150    SUB_FULL_HASH,  // 32 byte sub entry.
151  };
152
153  // Creates a SBEntry with the necessary size for the given number of prefixes.
154  // Caller ownes the object and needs to free it by calling Destroy.
155  static SBEntry* Create(Type type, int prefix_count);
156
157  // Frees the entry's memory.
158  void Destroy();
159
160  void set_list_id(int list_id) { data_.list_id = list_id; }
161  int list_id() const { return data_.list_id; }
162  void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
163  int chunk_id() const { return data_.chunk_id; }
164  int prefix_count() const { return data_.prefix_count; }
165
166  // Returns a new entry that is larger by the given number of prefixes, with
167  // all the existing data already copied over.  The old entry is destroyed.
168  SBEntry* Enlarge(int extra_prefixes);
169
170  // Returns true if this is a prefix as opposed to a full hash.
171  bool IsPrefix() const {
172    return type() == ADD_PREFIX || type() == SUB_PREFIX;
173  }
174
175  // Returns true if this is an add entry.
176  bool IsAdd() const {
177    return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
178  }
179
180  // Returns true if this is a sub entry.
181  bool IsSub() const {
182    return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
183  }
184
185  // Helper to return the size of the prefixes.
186  int HashLen() const {
187    return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
188  }
189
190  // For add entries, returns the add chunk id.  For sub entries, returns the
191  // add_chunk id for the prefix at the given index.
192  int ChunkIdAtPrefix(int index) const;
193
194  // Used for sub chunks to set the chunk id at a given index.
195  void SetChunkIdAtPrefix(int index, int chunk_id);
196
197  // Return the prefix/full hash at the given index.  Caller is expected to
198  // call the right function based on the hash length.
199  const SBPrefix& PrefixAt(int index) const;
200  const SBFullHash& FullHashAt(int index) const;
201
202  // Return the prefix/full hash at the given index.  Caller is expected to
203  // call the right function based on the hash length.
204  void SetPrefixAt(int index, const SBPrefix& prefix);
205  void SetFullHashAt(int index, const SBFullHash& full_hash);
206
207 private:
208  // Container for a sub prefix.
209  struct SBSubPrefix {
210    int add_chunk;
211    SBPrefix prefix;
212  };
213
214  // Container for a sub full hash.
215  struct SBSubFullHash {
216    int add_chunk;
217    SBFullHash prefix;
218  };
219
220  // Keep the fixed data together in one struct so that we can get its size
221  // easily.  If any of this is modified, the database will have to be cleared.
222  struct Data {
223    int list_id;
224    // For adds, this is the add chunk number.
225    // For subs: if prefix_count is 0 then this is the add chunk that this sub
226    //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
227    //     or sub_full_hashes is used for each corresponding prefix.
228    int chunk_id;
229    Type type;
230    int prefix_count;
231  };
232
233  SBEntry();
234  ~SBEntry();
235
236  // Helper to return the size of each prefix entry (i.e. for subs this
237  // includes an add chunk id).
238  static int PrefixSize(Type type);
239
240  // Helper to return how much memory a given Entry would require.
241  static int Size(Type type, int prefix_count);
242
243  // Returns how many bytes this entry is.
244  int Size() const;
245
246  Type type() const { return data_.type; }
247
248  void set_prefix_count(int count) { data_.prefix_count = count; }
249  void set_type(Type type) { data_.type = type; }
250
251  // The prefixes union must follow the fixed data so that they're contiguous
252  // in memory.
253  Data data_;
254  union {
255    SBPrefix add_prefixes_[1];
256    SBSubPrefix sub_prefixes_[1];
257    SBFullHash add_full_hashes_[1];
258    SBSubFullHash sub_full_hashes_[1];
259  };
260};
261
262
263// Utility functions -----------------------------------------------------------
264
265namespace safe_browsing_util {
266
267// SafeBrowsing list names.
268extern const char kMalwareList[];
269extern const char kPhishingList[];
270
271// Converts between the SafeBrowsing list names and their enumerated value.
272// If the list names change, both of these methods must be updated.
273enum ListType {
274  INVALID = -1,
275  MALWARE = 0,
276  PHISH = 1,
277};
278int GetListId(const std::string& name);
279std::string GetListName(int list_id);
280
281// Canonicalizes url as per Google Safe Browsing Specification.
282// See section 6.1 in
283// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
284void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
285                     std::string* canonicalized_path,
286                     std::string* canonicalized_query);
287
288// Given a URL, returns all the hosts we need to check.  They are returned
289// in order of size (i.e. b.c is first, then a.b.c).
290void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
291
292// Given a URL, returns all the paths we need to check.
293void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
294
295// Given a URL, compare all the possible host + path full hashes to the set of
296// provided full hashes.  Returns the index of the match if one is found, or -1
297// otherwise.
298int CompareFullHashes(const GURL& url,
299                      const std::vector<SBFullHashResult>& full_hashes);
300
301bool IsPhishingList(const std::string& list_name);
302bool IsMalwareList(const std::string& list_name);
303
304// Returns 'true' if 'mac' can be verified using 'key' and 'data'.
305bool VerifyMAC(const std::string& key,
306               const std::string& mac,
307               const char* data,
308               int data_length);
309
310GURL GeneratePhishingReportUrl(const std::string& report_page,
311                               const std::string& url_to_report);
312
313}  // namespace safe_browsing_util
314
315#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
316