safe_browsing_util.h revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// Utilities for the SafeBrowsing code. 6 7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9#pragma once 10 11#include <cstring> 12#include <deque> 13#include <string> 14#include <vector> 15 16#include "base/basictypes.h" 17#include "chrome/browser/safe_browsing/chunk_range.h" 18 19class GURL; 20 21class SBEntry; 22 23// A truncated hash's type. 24typedef int SBPrefix; 25 26// Container for holding a chunk URL and the MAC of the contents of the URL. 27struct ChunkUrl { 28 std::string url; 29 std::string mac; 30 std::string list_name; 31}; 32 33// A full hash. 34union SBFullHash { 35 char full_hash[32]; 36 SBPrefix prefix; 37}; 38 39inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) { 40 return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0; 41} 42 43// Container for information about a specific host in an add/sub chunk. 44struct SBChunkHost { 45 SBPrefix host; 46 SBEntry* entry; 47}; 48 49// Container for an add/sub chunk. 50struct SBChunk { 51 SBChunk(); 52 ~SBChunk(); 53 54 int chunk_number; 55 int list_id; 56 bool is_add; 57 std::deque<SBChunkHost> hosts; 58}; 59 60// Container for a set of chunks. Interim wrapper to replace use of 61// |std::deque<SBChunk>| with something having safer memory semantics. 62// management. 63// TODO(shess): |SBEntry| is currently a very roundabout way to hold 64// things pending storage. It could be replaced with the structures 65// used in SafeBrowsingStore, then lots of bridging code could 66// dissappear. 67class SBChunkList { 68 public: 69 SBChunkList(); 70 ~SBChunkList(); 71 72 // Implement that subset of the |std::deque<>| interface which 73 // callers expect. 74 bool empty() const { return chunks_.empty(); } 75 size_t size() { return chunks_.size(); } 76 77 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); } 78 SBChunk& back() { return chunks_.back(); } 79 SBChunk& front() { return chunks_.front(); } 80 const SBChunk& front() const { return chunks_.front(); } 81 82 typedef std::vector<SBChunk>::const_iterator const_iterator; 83 const_iterator begin() const { return chunks_.begin(); } 84 const_iterator end() const { return chunks_.end(); } 85 86 typedef std::vector<SBChunk>::iterator iterator; 87 iterator begin() { return chunks_.begin(); } 88 iterator end() { return chunks_.end(); } 89 90 SBChunk& operator[](size_t n) { return chunks_[n]; } 91 const SBChunk& operator[](size_t n) const { return chunks_[n]; } 92 93 // Calls |SBEvent::Destroy()| before clearing |chunks_|. 94 void clear(); 95 96 private: 97 std::vector<SBChunk> chunks_; 98 99 DISALLOW_COPY_AND_ASSIGN(SBChunkList); 100}; 101 102// Used when we get a gethash response. 103struct SBFullHashResult { 104 SBFullHash hash; 105 std::string list_name; 106 int add_chunk_id; 107}; 108 109// Contains information about a list in the database. 110struct SBListChunkRanges { 111 explicit SBListChunkRanges(const std::string& n); 112 113 std::string name; // The list name. 114 std::string adds; // The ranges for add chunks. 115 std::string subs; // The ranges for sub chunks. 116}; 117 118// Container for deleting chunks from the database. 119struct SBChunkDelete { 120 SBChunkDelete(); 121 ~SBChunkDelete(); 122 123 std::string list_name; 124 bool is_sub_del; 125 std::vector<ChunkRange> chunk_del; 126}; 127 128 129// SBEntry --------------------------------------------------------------------- 130 131// Holds information about the prefixes for a hostkey. prefixes can either be 132// 4 bytes (truncated hash) or 32 bytes (full hash). 133// For adds: 134// [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] 135// For subs: 136// [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] 137// [add chunk][prefix][add chunk][prefix] 138class SBEntry { 139 public: 140 enum Type { 141 ADD_PREFIX, // 4 byte add entry. 142 SUB_PREFIX, // 4 byte sub entry. 143 ADD_FULL_HASH, // 32 byte add entry. 144 SUB_FULL_HASH, // 32 byte sub entry. 145 }; 146 147 // Creates a SBEntry with the necessary size for the given number of prefixes. 148 // Caller ownes the object and needs to free it by calling Destroy. 149 static SBEntry* Create(Type type, int prefix_count); 150 151 // Frees the entry's memory. 152 void Destroy(); 153 154 void set_list_id(int list_id) { data_.list_id = list_id; } 155 int list_id() const { return data_.list_id; } 156 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } 157 int chunk_id() const { return data_.chunk_id; } 158 int prefix_count() const { return data_.prefix_count; } 159 160 // Returns a new entry that is larger by the given number of prefixes, with 161 // all the existing data already copied over. The old entry is destroyed. 162 SBEntry* Enlarge(int extra_prefixes); 163 164 // Returns true if this is a prefix as opposed to a full hash. 165 bool IsPrefix() const { 166 return type() == ADD_PREFIX || type() == SUB_PREFIX; 167 } 168 169 // Returns true if this is an add entry. 170 bool IsAdd() const { 171 return type() == ADD_PREFIX || type() == ADD_FULL_HASH; 172 } 173 174 // Returns true if this is a sub entry. 175 bool IsSub() const { 176 return type() == SUB_PREFIX || type() == SUB_FULL_HASH; 177 } 178 179 // Helper to return the size of the prefixes. 180 int HashLen() const { 181 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash); 182 } 183 184 // For add entries, returns the add chunk id. For sub entries, returns the 185 // add_chunk id for the prefix at the given index. 186 int ChunkIdAtPrefix(int index) const; 187 188 // Used for sub chunks to set the chunk id at a given index. 189 void SetChunkIdAtPrefix(int index, int chunk_id); 190 191 // Return the prefix/full hash at the given index. Caller is expected to 192 // call the right function based on the hash length. 193 const SBPrefix& PrefixAt(int index) const; 194 const SBFullHash& FullHashAt(int index) const; 195 196 // Return the prefix/full hash at the given index. Caller is expected to 197 // call the right function based on the hash length. 198 void SetPrefixAt(int index, const SBPrefix& prefix); 199 void SetFullHashAt(int index, const SBFullHash& full_hash); 200 201 private: 202 // Container for a sub prefix. 203 struct SBSubPrefix { 204 int add_chunk; 205 SBPrefix prefix; 206 }; 207 208 // Container for a sub full hash. 209 struct SBSubFullHash { 210 int add_chunk; 211 SBFullHash prefix; 212 }; 213 214 // Keep the fixed data together in one struct so that we can get its size 215 // easily. If any of this is modified, the database will have to be cleared. 216 struct Data { 217 int list_id; 218 // For adds, this is the add chunk number. 219 // For subs: if prefix_count is 0 then this is the add chunk that this sub 220 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes 221 // or sub_full_hashes is used for each corresponding prefix. 222 int chunk_id; 223 Type type; 224 int prefix_count; 225 }; 226 227 SBEntry(); 228 ~SBEntry(); 229 230 // Helper to return the size of each prefix entry (i.e. for subs this 231 // includes an add chunk id). 232 static int PrefixSize(Type type); 233 234 // Helper to return how much memory a given Entry would require. 235 static int Size(Type type, int prefix_count); 236 237 // Returns how many bytes this entry is. 238 int Size() const; 239 240 Type type() const { return data_.type; } 241 242 void set_prefix_count(int count) { data_.prefix_count = count; } 243 void set_type(Type type) { data_.type = type; } 244 245 // The prefixes union must follow the fixed data so that they're contiguous 246 // in memory. 247 Data data_; 248 union { 249 SBPrefix add_prefixes_[1]; 250 SBSubPrefix sub_prefixes_[1]; 251 SBFullHash add_full_hashes_[1]; 252 SBSubFullHash sub_full_hashes_[1]; 253 }; 254}; 255 256 257// Utility functions ----------------------------------------------------------- 258 259namespace safe_browsing_util { 260 261// SafeBrowsing list names. 262extern const char kMalwareList[]; 263extern const char kPhishingList[]; 264 265// Converts between the SafeBrowsing list names and their enumerated value. 266// If the list names change, both of these methods must be updated. 267enum ListType { 268 INVALID = -1, 269 MALWARE = 0, 270 PHISH = 1, 271}; 272int GetListId(const std::string& name); 273std::string GetListName(int list_id); 274 275// Canonicalizes url as per Google Safe Browsing Specification. 276// See section 6.1 in 277// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 278void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 279 std::string* canonicalized_path, 280 std::string* canonicalized_query); 281 282// Given a URL, returns all the hosts we need to check. They are returned 283// in order of size (i.e. b.c is first, then a.b.c). 284void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 285 286// Given a URL, returns all the paths we need to check. 287void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 288 289// Given a URL, compare all the possible host + path full hashes to the set of 290// provided full hashes. Returns the index of the match if one is found, or -1 291// otherwise. 292int CompareFullHashes(const GURL& url, 293 const std::vector<SBFullHashResult>& full_hashes); 294 295bool IsPhishingList(const std::string& list_name); 296bool IsMalwareList(const std::string& list_name); 297 298// Returns 'true' if 'mac' can be verified using 'key' and 'data'. 299bool VerifyMAC(const std::string& key, 300 const std::string& mac, 301 const char* data, 302 int data_length); 303 304GURL GeneratePhishingReportUrl(const std::string& report_page, 305 const std::string& url_to_report); 306 307} // namespace safe_browsing_util 308 309#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 310