safe_browsing_util.h revision dc0f95d653279beabeb9817299e2902918ba123e
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// Utilities for the SafeBrowsing code. 6 7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9#pragma once 10 11#include <cstring> 12#include <deque> 13#include <string> 14#include <vector> 15 16#include "base/basictypes.h" 17#include "chrome/browser/safe_browsing/chunk_range.h" 18 19class GURL; 20 21class SBEntry; 22 23// A truncated hash's type. 24typedef int SBPrefix; 25 26// Container for holding a chunk URL and the MAC of the contents of the URL. 27struct ChunkUrl { 28 std::string url; 29 std::string mac; 30 std::string list_name; 31}; 32 33// A full hash. 34union SBFullHash { 35 char full_hash[32]; 36 SBPrefix prefix; 37}; 38 39inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) { 40 return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0; 41} 42 43// Container for information about a specific host in an add/sub chunk. 44struct SBChunkHost { 45 SBPrefix host; 46 SBEntry* entry; 47}; 48 49// Container for an add/sub chunk. 50struct SBChunk { 51 SBChunk(); 52 ~SBChunk(); 53 54 int chunk_number; 55 int list_id; 56 bool is_add; 57 std::deque<SBChunkHost> hosts; 58}; 59 60// Container for a set of chunks. Interim wrapper to replace use of 61// |std::deque<SBChunk>| with something having safer memory semantics. 62// management. 63// TODO(shess): |SBEntry| is currently a very roundabout way to hold 64// things pending storage. It could be replaced with the structures 65// used in SafeBrowsingStore, then lots of bridging code could 66// dissappear. 67class SBChunkList { 68 public: 69 SBChunkList(); 70 ~SBChunkList(); 71 72 // Implement that subset of the |std::deque<>| interface which 73 // callers expect. 74 bool empty() const { return chunks_.empty(); } 75 size_t size() { return chunks_.size(); } 76 77 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); } 78 SBChunk& back() { return chunks_.back(); } 79 SBChunk& front() { return chunks_.front(); } 80 const SBChunk& front() const { return chunks_.front(); } 81 82 typedef std::vector<SBChunk>::const_iterator const_iterator; 83 const_iterator begin() const { return chunks_.begin(); } 84 const_iterator end() const { return chunks_.end(); } 85 86 typedef std::vector<SBChunk>::iterator iterator; 87 iterator begin() { return chunks_.begin(); } 88 iterator end() { return chunks_.end(); } 89 90 SBChunk& operator[](size_t n) { return chunks_[n]; } 91 const SBChunk& operator[](size_t n) const { return chunks_[n]; } 92 93 // Calls |SBEvent::Destroy()| before clearing |chunks_|. 94 void clear(); 95 96 private: 97 std::vector<SBChunk> chunks_; 98 99 DISALLOW_COPY_AND_ASSIGN(SBChunkList); 100}; 101 102// Used when we get a gethash response. 103struct SBFullHashResult { 104 SBFullHash hash; 105 std::string list_name; 106 int add_chunk_id; 107}; 108 109// Contains information about a list in the database. 110struct SBListChunkRanges { 111 explicit SBListChunkRanges(const std::string& n); 112 113 std::string name; // The list name. 114 std::string adds; // The ranges for add chunks. 115 std::string subs; // The ranges for sub chunks. 116}; 117 118// Container for deleting chunks from the database. 119struct SBChunkDelete { 120 SBChunkDelete(); 121 ~SBChunkDelete(); 122 123 std::string list_name; 124 bool is_sub_del; 125 std::vector<ChunkRange> chunk_del; 126}; 127 128 129// SBEntry --------------------------------------------------------------------- 130 131// Holds information about the prefixes for a hostkey. prefixes can either be 132// 4 bytes (truncated hash) or 32 bytes (full hash). 133// For adds: 134// [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] 135// For subs: 136// [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] 137// [add chunk][prefix][add chunk][prefix] 138class SBEntry { 139 public: 140 enum Type { 141 ADD_PREFIX, // 4 byte add entry. 142 SUB_PREFIX, // 4 byte sub entry. 143 ADD_FULL_HASH, // 32 byte add entry. 144 SUB_FULL_HASH, // 32 byte sub entry. 145 }; 146 147 // Creates a SBEntry with the necessary size for the given number of prefixes. 148 // Caller ownes the object and needs to free it by calling Destroy. 149 static SBEntry* Create(Type type, int prefix_count); 150 151 // Frees the entry's memory. 152 void Destroy(); 153 154 void set_list_id(int list_id) { data_.list_id = list_id; } 155 int list_id() const { return data_.list_id; } 156 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } 157 int chunk_id() const { return data_.chunk_id; } 158 int prefix_count() const { return data_.prefix_count; } 159 160 // Returns true if this is a prefix as opposed to a full hash. 161 bool IsPrefix() const { 162 return type() == ADD_PREFIX || type() == SUB_PREFIX; 163 } 164 165 // Returns true if this is an add entry. 166 bool IsAdd() const { 167 return type() == ADD_PREFIX || type() == ADD_FULL_HASH; 168 } 169 170 // Returns true if this is a sub entry. 171 bool IsSub() const { 172 return type() == SUB_PREFIX || type() == SUB_FULL_HASH; 173 } 174 175 // Helper to return the size of the prefixes. 176 int HashLen() const { 177 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash); 178 } 179 180 // For add entries, returns the add chunk id. For sub entries, returns the 181 // add_chunk id for the prefix at the given index. 182 int ChunkIdAtPrefix(int index) const; 183 184 // Used for sub chunks to set the chunk id at a given index. 185 void SetChunkIdAtPrefix(int index, int chunk_id); 186 187 // Return the prefix/full hash at the given index. Caller is expected to 188 // call the right function based on the hash length. 189 const SBPrefix& PrefixAt(int index) const; 190 const SBFullHash& FullHashAt(int index) const; 191 192 // Return the prefix/full hash at the given index. Caller is expected to 193 // call the right function based on the hash length. 194 void SetPrefixAt(int index, const SBPrefix& prefix); 195 void SetFullHashAt(int index, const SBFullHash& full_hash); 196 197 private: 198 // Container for a sub prefix. 199 struct SBSubPrefix { 200 int add_chunk; 201 SBPrefix prefix; 202 }; 203 204 // Container for a sub full hash. 205 struct SBSubFullHash { 206 int add_chunk; 207 SBFullHash prefix; 208 }; 209 210 // Keep the fixed data together in one struct so that we can get its size 211 // easily. If any of this is modified, the database will have to be cleared. 212 struct Data { 213 int list_id; 214 // For adds, this is the add chunk number. 215 // For subs: if prefix_count is 0 then this is the add chunk that this sub 216 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes 217 // or sub_full_hashes is used for each corresponding prefix. 218 int chunk_id; 219 Type type; 220 int prefix_count; 221 }; 222 223 SBEntry(); 224 ~SBEntry(); 225 226 // Helper to return the size of each prefix entry (i.e. for subs this 227 // includes an add chunk id). 228 static int PrefixSize(Type type); 229 230 // Helper to return how much memory a given Entry would require. 231 static int Size(Type type, int prefix_count); 232 233 // Returns how many bytes this entry is. 234 int Size() const; 235 236 Type type() const { return data_.type; } 237 238 void set_prefix_count(int count) { data_.prefix_count = count; } 239 void set_type(Type type) { data_.type = type; } 240 241 // The prefixes union must follow the fixed data so that they're contiguous 242 // in memory. 243 Data data_; 244 union { 245 SBPrefix add_prefixes_[1]; 246 SBSubPrefix sub_prefixes_[1]; 247 SBFullHash add_full_hashes_[1]; 248 SBSubFullHash sub_full_hashes_[1]; 249 }; 250}; 251 252 253// Utility functions ----------------------------------------------------------- 254 255namespace safe_browsing_util { 256 257// SafeBrowsing list names. 258extern const char kMalwareList[]; 259extern const char kPhishingList[]; 260// Binary Download list names. 261extern const char kBinUrlList[]; 262extern const char kBinHashList[]; 263 264enum ListType { 265 INVALID = -1, 266 MALWARE = 0, 267 PHISH = 1, 268 BINURL = 2, 269 BINHASH = 3, 270}; 271 272// Maps a list name to ListType. 273int GetListId(const std::string& name); 274// Maps a ListId to list name. Return false if fails. 275bool GetListName(int list_id, std::string* list); 276 277 278// Canonicalizes url as per Google Safe Browsing Specification. 279// See section 6.1 in 280// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 281void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 282 std::string* canonicalized_path, 283 std::string* canonicalized_query); 284 285// Given a URL, returns all the hosts we need to check. They are returned 286// in order of size (i.e. b.c is first, then a.b.c). 287void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 288 289// Given a URL, returns all the paths we need to check. 290void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 291 292int GetHashIndex(const SBFullHash& hash, 293 const std::vector<SBFullHashResult>& full_hashes); 294 295// Given a URL, compare all the possible host + path full hashes to the set of 296// provided full hashes. Returns the index of the match if one is found, or -1 297// otherwise. 298int GetUrlHashIndex(const GURL& url, 299 const std::vector<SBFullHashResult>& full_hashes); 300 301bool IsPhishingList(const std::string& list_name); 302bool IsMalwareList(const std::string& list_name); 303bool IsBadbinurlList(const std::string& list_name); 304bool IsBadbinhashList(const std::string& list_name); 305 306// Returns 'true' if 'mac' can be verified using 'key' and 'data'. 307bool VerifyMAC(const std::string& key, 308 const std::string& mac, 309 const char* data, 310 int data_length); 311 312GURL GeneratePhishingReportUrl(const std::string& report_page, 313 const std::string& url_to_report); 314 315void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out); 316 317} // namespace safe_browsing_util 318 319#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 320