safe_browsing_util.h revision 731df977c0511bca2206b5f333555b1205ff1f43
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// Utilities for the SafeBrowsing code. 6 7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9#pragma once 10 11#include <cstring> 12#include <deque> 13#include <string> 14#include <vector> 15 16#include "base/basictypes.h" 17#include "chrome/browser/safe_browsing/chunk_range.h" 18 19class GURL; 20 21#ifdef SB_LOGGING_ENABLED 22#define SB_DLOG(severity) DLOG_IF(INFO, 1) 23#else 24#define SB_DLOG(severity) DLOG_IF(INFO, 0) 25#endif 26 27class SBEntry; 28 29// A truncated hash's type. 30typedef int SBPrefix; 31 32// Container for holding a chunk URL and the MAC of the contents of the URL. 33struct ChunkUrl { 34 std::string url; 35 std::string mac; 36 std::string list_name; 37}; 38 39// A full hash. 40union SBFullHash { 41 char full_hash[32]; 42 SBPrefix prefix; 43}; 44 45inline bool operator==(const SBFullHash& rhash, const SBFullHash& lhash) { 46 return memcmp(rhash.full_hash, lhash.full_hash, sizeof(SBFullHash)) == 0; 47} 48 49// Container for information about a specific host in an add/sub chunk. 50struct SBChunkHost { 51 SBPrefix host; 52 SBEntry* entry; 53}; 54 55// Container for an add/sub chunk. 56struct SBChunk { 57 SBChunk(); 58 ~SBChunk(); 59 60 int chunk_number; 61 int list_id; 62 bool is_add; 63 std::deque<SBChunkHost> hosts; 64}; 65 66// Container for a set of chunks. Interim wrapper to replace use of 67// |std::deque<SBChunk>| with something having safer memory semantics. 68// management. 69// TODO(shess): |SBEntry| is currently a very roundabout way to hold 70// things pending storage. It could be replaced with the structures 71// used in SafeBrowsingStore, then lots of bridging code could 72// dissappear. 73class SBChunkList { 74 public: 75 SBChunkList(); 76 ~SBChunkList(); 77 78 // Implement that subset of the |std::deque<>| interface which 79 // callers expect. 80 bool empty() const { return chunks_.empty(); } 81 size_t size() { return chunks_.size(); } 82 83 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); } 84 SBChunk& back() { return chunks_.back(); } 85 SBChunk& front() { return chunks_.front(); } 86 const SBChunk& front() const { return chunks_.front(); } 87 88 typedef std::vector<SBChunk>::const_iterator const_iterator; 89 const_iterator begin() const { return chunks_.begin(); } 90 const_iterator end() const { return chunks_.end(); } 91 92 typedef std::vector<SBChunk>::iterator iterator; 93 iterator begin() { return chunks_.begin(); } 94 iterator end() { return chunks_.end(); } 95 96 SBChunk& operator[](size_t n) { return chunks_[n]; } 97 const SBChunk& operator[](size_t n) const { return chunks_[n]; } 98 99 // Calls |SBEvent::Destroy()| before clearing |chunks_|. 100 void clear(); 101 102 private: 103 std::vector<SBChunk> chunks_; 104 105 DISALLOW_COPY_AND_ASSIGN(SBChunkList); 106}; 107 108// Used when we get a gethash response. 109struct SBFullHashResult { 110 SBFullHash hash; 111 std::string list_name; 112 int add_chunk_id; 113}; 114 115// Contains information about a list in the database. 116struct SBListChunkRanges { 117 explicit SBListChunkRanges(const std::string& n); 118 119 std::string name; // The list name. 120 std::string adds; // The ranges for add chunks. 121 std::string subs; // The ranges for sub chunks. 122}; 123 124// Container for deleting chunks from the database. 125struct SBChunkDelete { 126 SBChunkDelete(); 127 ~SBChunkDelete(); 128 129 std::string list_name; 130 bool is_sub_del; 131 std::vector<ChunkRange> chunk_del; 132}; 133 134 135// SBEntry --------------------------------------------------------------------- 136 137// Holds information about the prefixes for a hostkey. prefixes can either be 138// 4 bytes (truncated hash) or 32 bytes (full hash). 139// For adds: 140// [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] 141// For subs: 142// [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] 143// [add chunk][prefix][add chunk][prefix] 144class SBEntry { 145 public: 146 enum Type { 147 ADD_PREFIX, // 4 byte add entry. 148 SUB_PREFIX, // 4 byte sub entry. 149 ADD_FULL_HASH, // 32 byte add entry. 150 SUB_FULL_HASH, // 32 byte sub entry. 151 }; 152 153 // Creates a SBEntry with the necessary size for the given number of prefixes. 154 // Caller ownes the object and needs to free it by calling Destroy. 155 static SBEntry* Create(Type type, int prefix_count); 156 157 // Frees the entry's memory. 158 void Destroy(); 159 160 void set_list_id(int list_id) { data_.list_id = list_id; } 161 int list_id() const { return data_.list_id; } 162 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } 163 int chunk_id() const { return data_.chunk_id; } 164 int prefix_count() const { return data_.prefix_count; } 165 166 // Returns a new entry that is larger by the given number of prefixes, with 167 // all the existing data already copied over. The old entry is destroyed. 168 SBEntry* Enlarge(int extra_prefixes); 169 170 // Returns true if this is a prefix as opposed to a full hash. 171 bool IsPrefix() const { 172 return type() == ADD_PREFIX || type() == SUB_PREFIX; 173 } 174 175 // Returns true if this is an add entry. 176 bool IsAdd() const { 177 return type() == ADD_PREFIX || type() == ADD_FULL_HASH; 178 } 179 180 // Returns true if this is a sub entry. 181 bool IsSub() const { 182 return type() == SUB_PREFIX || type() == SUB_FULL_HASH; 183 } 184 185 // Helper to return the size of the prefixes. 186 int HashLen() const { 187 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash); 188 } 189 190 // For add entries, returns the add chunk id. For sub entries, returns the 191 // add_chunk id for the prefix at the given index. 192 int ChunkIdAtPrefix(int index) const; 193 194 // Used for sub chunks to set the chunk id at a given index. 195 void SetChunkIdAtPrefix(int index, int chunk_id); 196 197 // Return the prefix/full hash at the given index. Caller is expected to 198 // call the right function based on the hash length. 199 const SBPrefix& PrefixAt(int index) const; 200 const SBFullHash& FullHashAt(int index) const; 201 202 // Return the prefix/full hash at the given index. Caller is expected to 203 // call the right function based on the hash length. 204 void SetPrefixAt(int index, const SBPrefix& prefix); 205 void SetFullHashAt(int index, const SBFullHash& full_hash); 206 207 private: 208 // Container for a sub prefix. 209 struct SBSubPrefix { 210 int add_chunk; 211 SBPrefix prefix; 212 }; 213 214 // Container for a sub full hash. 215 struct SBSubFullHash { 216 int add_chunk; 217 SBFullHash prefix; 218 }; 219 220 // Keep the fixed data together in one struct so that we can get its size 221 // easily. If any of this is modified, the database will have to be cleared. 222 struct Data { 223 int list_id; 224 // For adds, this is the add chunk number. 225 // For subs: if prefix_count is 0 then this is the add chunk that this sub 226 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes 227 // or sub_full_hashes is used for each corresponding prefix. 228 int chunk_id; 229 Type type; 230 int prefix_count; 231 }; 232 233 SBEntry(); 234 ~SBEntry(); 235 236 // Helper to return the size of each prefix entry (i.e. for subs this 237 // includes an add chunk id). 238 static int PrefixSize(Type type); 239 240 // Helper to return how much memory a given Entry would require. 241 static int Size(Type type, int prefix_count); 242 243 // Returns how many bytes this entry is. 244 int Size() const; 245 246 Type type() const { return data_.type; } 247 248 void set_prefix_count(int count) { data_.prefix_count = count; } 249 void set_type(Type type) { data_.type = type; } 250 251 // The prefixes union must follow the fixed data so that they're contiguous 252 // in memory. 253 Data data_; 254 union { 255 SBPrefix add_prefixes_[1]; 256 SBSubPrefix sub_prefixes_[1]; 257 SBFullHash add_full_hashes_[1]; 258 SBSubFullHash sub_full_hashes_[1]; 259 }; 260}; 261 262 263// Utility functions ----------------------------------------------------------- 264 265namespace safe_browsing_util { 266 267// SafeBrowsing list names. 268extern const char kMalwareList[]; 269extern const char kPhishingList[]; 270 271// Converts between the SafeBrowsing list names and their enumerated value. 272// If the list names change, both of these methods must be updated. 273enum ListType { 274 INVALID = -1, 275 MALWARE = 0, 276 PHISH = 1, 277}; 278int GetListId(const std::string& name); 279std::string GetListName(int list_id); 280 281// Canonicalizes url as per Google Safe Browsing Specification. 282// See section 6.1 in 283// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 284void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 285 std::string* canonicalized_path, 286 std::string* canonicalized_query); 287 288// Given a URL, returns all the hosts we need to check. They are returned 289// in order of size (i.e. b.c is first, then a.b.c). 290void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 291 292// Given a URL, returns all the paths we need to check. 293void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 294 295// Given a URL, compare all the possible host + path full hashes to the set of 296// provided full hashes. Returns the index of the match if one is found, or -1 297// otherwise. 298int CompareFullHashes(const GURL& url, 299 const std::vector<SBFullHashResult>& full_hashes); 300 301bool IsPhishingList(const std::string& list_name); 302bool IsMalwareList(const std::string& list_name); 303 304// Returns 'true' if 'mac' can be verified using 'key' and 'data'. 305bool VerifyMAC(const std::string& key, 306 const std::string& mac, 307 const char* data, 308 int data_length); 309 310GURL GeneratePhishingReportUrl(const std::string& report_page, 311 const std::string& url_to_report); 312 313} // namespace safe_browsing_util 314 315#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 316