safe_browsing_util.h revision 0f1bc08d4cfcc34181b0b5cbf065c40f687bf740
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// Utilities for the SafeBrowsing code. 6 7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 10#include <cstring> 11#include <deque> 12#include <set> 13#include <string> 14#include <vector> 15 16#include "base/basictypes.h" 17#include "chrome/browser/safe_browsing/chunk_range.h" 18 19class GURL; 20 21class SBEntry; 22 23// A truncated hash's type. 24typedef int32 SBPrefix; 25 26// Container for holding a chunk URL and the list it belongs to. 27struct ChunkUrl { 28 std::string url; 29 std::string list_name; 30}; 31 32// A full hash. 33union SBFullHash { 34 char full_hash[32]; 35 SBPrefix prefix; 36}; 37 38inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) { 39 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0; 40} 41 42inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) { 43 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0; 44} 45 46// Container for information about a specific host in an add/sub chunk. 47struct SBChunkHost { 48 SBPrefix host; 49 SBEntry* entry; 50}; 51 52// Container for an add/sub chunk. 53struct SBChunk { 54 SBChunk(); 55 ~SBChunk(); 56 57 int chunk_number; 58 int list_id; 59 bool is_add; 60 std::deque<SBChunkHost> hosts; 61}; 62 63// Container for a set of chunks. Interim wrapper to replace use of 64// |std::deque<SBChunk>| with something having safer memory semantics. 65// management. 66// TODO(shess): |SBEntry| is currently a very roundabout way to hold 67// things pending storage. It could be replaced with the structures 68// used in SafeBrowsingStore, then lots of bridging code could 69// dissappear. 70class SBChunkList { 71 public: 72 SBChunkList(); 73 ~SBChunkList(); 74 75 // Implement that subset of the |std::deque<>| interface which 76 // callers expect. 77 bool empty() const { return chunks_.empty(); } 78 size_t size() { return chunks_.size(); } 79 80 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); } 81 SBChunk& back() { return chunks_.back(); } 82 SBChunk& front() { return chunks_.front(); } 83 const SBChunk& front() const { return chunks_.front(); } 84 85 typedef std::vector<SBChunk>::const_iterator const_iterator; 86 const_iterator begin() const { return chunks_.begin(); } 87 const_iterator end() const { return chunks_.end(); } 88 89 typedef std::vector<SBChunk>::iterator iterator; 90 iterator begin() { return chunks_.begin(); } 91 iterator end() { return chunks_.end(); } 92 93 SBChunk& operator[](size_t n) { return chunks_[n]; } 94 const SBChunk& operator[](size_t n) const { return chunks_[n]; } 95 96 // Calls |SBEvent::Destroy()| before clearing |chunks_|. 97 void clear(); 98 99 private: 100 std::vector<SBChunk> chunks_; 101 102 DISALLOW_COPY_AND_ASSIGN(SBChunkList); 103}; 104 105// Used when we get a gethash response. 106struct SBFullHashResult { 107 SBFullHash hash; 108 std::string list_name; 109 int add_chunk_id; 110}; 111 112// Contains information about a list in the database. 113struct SBListChunkRanges { 114 explicit SBListChunkRanges(const std::string& n); 115 116 std::string name; // The list name. 117 std::string adds; // The ranges for add chunks. 118 std::string subs; // The ranges for sub chunks. 119}; 120 121// Container for deleting chunks from the database. 122struct SBChunkDelete { 123 SBChunkDelete(); 124 ~SBChunkDelete(); 125 126 std::string list_name; 127 bool is_sub_del; 128 std::vector<ChunkRange> chunk_del; 129}; 130 131// Different types of threats that SafeBrowsing protects against. 132enum SBThreatType { 133 // No threat at all. 134 SB_THREAT_TYPE_SAFE, 135 136 // The URL is being used for phishing. 137 SB_THREAT_TYPE_URL_PHISHING, 138 139 // The URL hosts malware. 140 SB_THREAT_TYPE_URL_MALWARE, 141 142 // The download URL is malware. 143 SB_THREAT_TYPE_BINARY_MALWARE_URL, 144 145 // The hash of the download contents is malware. 146 SB_THREAT_TYPE_BINARY_MALWARE_HASH, 147 148 // Url detected by the client-side phishing model. Note that unlike the 149 // above values, this does not correspond to a downloaded list. 150 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, 151 152 // The Chrome extension or app (given by its ID) is malware. 153 SB_THREAT_TYPE_EXTENSION, 154 155 // Url detected by the client-side malware IP list. This IP list is part 156 // of the client side detection model. 157 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL, 158}; 159 160// SBEntry --------------------------------------------------------------------- 161 162// Holds information about the prefixes for a hostkey. prefixes can either be 163// 4 bytes (truncated hash) or 32 bytes (full hash). 164// For adds: 165// [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] 166// For subs: 167// [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] 168// [add chunk][prefix][add chunk][prefix] 169class SBEntry { 170 public: 171 enum Type { 172 ADD_PREFIX, // 4 byte add entry. 173 SUB_PREFIX, // 4 byte sub entry. 174 ADD_FULL_HASH, // 32 byte add entry. 175 SUB_FULL_HASH, // 32 byte sub entry. 176 }; 177 178 // Creates a SBEntry with the necessary size for the given number of prefixes. 179 // Caller ownes the object and needs to free it by calling Destroy. 180 static SBEntry* Create(Type type, int prefix_count); 181 182 // Frees the entry's memory. 183 void Destroy(); 184 185 void set_list_id(int list_id) { data_.list_id = list_id; } 186 int list_id() const { return data_.list_id; } 187 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } 188 int chunk_id() const { return data_.chunk_id; } 189 int prefix_count() const { return data_.prefix_count; } 190 191 // Returns true if this is a prefix as opposed to a full hash. 192 bool IsPrefix() const { 193 return type() == ADD_PREFIX || type() == SUB_PREFIX; 194 } 195 196 // Returns true if this is an add entry. 197 bool IsAdd() const { 198 return type() == ADD_PREFIX || type() == ADD_FULL_HASH; 199 } 200 201 // Returns true if this is a sub entry. 202 bool IsSub() const { 203 return type() == SUB_PREFIX || type() == SUB_FULL_HASH; 204 } 205 206 // Helper to return the size of the prefixes. 207 int HashLen() const { 208 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash); 209 } 210 211 // For add entries, returns the add chunk id. For sub entries, returns the 212 // add_chunk id for the prefix at the given index. 213 int ChunkIdAtPrefix(int index) const; 214 215 // Used for sub chunks to set the chunk id at a given index. 216 void SetChunkIdAtPrefix(int index, int chunk_id); 217 218 // Return the prefix/full hash at the given index. Caller is expected to 219 // call the right function based on the hash length. 220 const SBPrefix& PrefixAt(int index) const; 221 const SBFullHash& FullHashAt(int index) const; 222 223 // Return the prefix/full hash at the given index. Caller is expected to 224 // call the right function based on the hash length. 225 void SetPrefixAt(int index, const SBPrefix& prefix); 226 void SetFullHashAt(int index, const SBFullHash& full_hash); 227 228 private: 229 // Container for a sub prefix. 230 struct SBSubPrefix { 231 int add_chunk; 232 SBPrefix prefix; 233 }; 234 235 // Container for a sub full hash. 236 struct SBSubFullHash { 237 int add_chunk; 238 SBFullHash prefix; 239 }; 240 241 // Keep the fixed data together in one struct so that we can get its size 242 // easily. If any of this is modified, the database will have to be cleared. 243 struct Data { 244 int list_id; 245 // For adds, this is the add chunk number. 246 // For subs: if prefix_count is 0 then this is the add chunk that this sub 247 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes 248 // or sub_full_hashes is used for each corresponding prefix. 249 int chunk_id; 250 Type type; 251 int prefix_count; 252 }; 253 254 SBEntry(); 255 ~SBEntry(); 256 257 // Helper to return the size of each prefix entry (i.e. for subs this 258 // includes an add chunk id). 259 static int PrefixSize(Type type); 260 261 // Helper to return how much memory a given Entry would require. 262 static int Size(Type type, int prefix_count); 263 264 // Returns how many bytes this entry is. 265 int Size() const; 266 267 Type type() const { return data_.type; } 268 269 void set_prefix_count(int count) { data_.prefix_count = count; } 270 void set_type(Type type) { data_.type = type; } 271 272 // The prefixes union must follow the fixed data so that they're contiguous 273 // in memory. 274 Data data_; 275 union { 276 SBPrefix add_prefixes_[1]; 277 SBSubPrefix sub_prefixes_[1]; 278 SBFullHash add_full_hashes_[1]; 279 SBSubFullHash sub_full_hashes_[1]; 280 }; 281}; 282 283 284// Utility functions ----------------------------------------------------------- 285 286namespace safe_browsing_util { 287 288// SafeBrowsing list names. 289extern const char kMalwareList[]; 290extern const char kPhishingList[]; 291// Binary Download list names. 292extern const char kBinUrlList[]; 293extern const char kBinHashList[]; 294// SafeBrowsing client-side detection whitelist list name. 295extern const char kCsdWhiteList[]; 296// SafeBrowsing download whitelist list name. 297extern const char kDownloadWhiteList[]; 298// SafeBrowsing extension list name. 299extern const char kExtensionBlacklist[]; 300// SafeBrowsing side-effect free whitelist name. 301extern const char kSideEffectFreeWhitelist[]; 302// SafeBrowsing csd malware IP blacklist name. 303extern const char kIPBlacklist[]; 304 305enum ListType { 306 INVALID = -1, 307 MALWARE = 0, 308 PHISH = 1, 309 BINURL = 2, 310 BINHASH = 3, 311 CSDWHITELIST = 4, 312 // SafeBrowsing lists are stored in pairs. Keep ListType 5 313 // available for a potential second list that we would store in the 314 // csd-whitelist store file. 315 DOWNLOADWHITELIST = 6, 316 // See above comment. Leave 7 available. 317 EXTENSIONBLACKLIST = 8, 318 // See above comment. Leave 9 available. 319 SIDEEFFECTFREEWHITELIST = 10, 320 // See above comment. Leave 11 available. 321 IPBLACKLIST = 12, 322 // See above comment. Leave 13 available. 323}; 324 325// Maps a list name to ListType. 326ListType GetListId(const std::string& name); 327 328// Maps a ListId to list name. Return false if fails. 329bool GetListName(ListType list_id, std::string* list); 330 331// Canonicalizes url as per Google Safe Browsing Specification. 332// See section 6.1 in 333// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 334void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 335 std::string* canonicalized_path, 336 std::string* canonicalized_query); 337 338// Given a URL, returns all the hosts we need to check. They are returned 339// in order of size (i.e. b.c is first, then a.b.c). 340void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 341 342// Given a URL, returns all the paths we need to check. 343void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 344 345// Given a URL, returns all the patterns we need to check. 346void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); 347 348int GetHashIndex(const SBFullHash& hash, 349 const std::vector<SBFullHashResult>& full_hashes); 350 351// Given a URL, compare all the possible host + path full hashes to the set of 352// provided full hashes. Returns the index of the match if one is found, or -1 353// otherwise. 354int GetUrlHashIndex(const GURL& url, 355 const std::vector<SBFullHashResult>& full_hashes); 356 357bool IsPhishingList(const std::string& list_name); 358bool IsMalwareList(const std::string& list_name); 359bool IsBadbinurlList(const std::string& list_name); 360bool IsBadbinhashList(const std::string& list_name); 361bool IsExtensionList(const std::string& list_name); 362 363GURL GeneratePhishingReportUrl(const std::string& report_page, 364 const std::string& url_to_report, 365 bool is_client_side_detection); 366 367SBFullHash StringToSBFullHash(const std::string& hash_in); 368std::string SBFullHashToString(const SBFullHash& hash_out); 369 370} // namespace safe_browsing_util 371 372#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 373