safe_browsing_util.h revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// Utilities for the SafeBrowsing code. 6 7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 10#include <cstring> 11#include <deque> 12#include <set> 13#include <string> 14#include <vector> 15 16#include "base/basictypes.h" 17#include "base/strings/string_piece.h" 18#include "chrome/browser/safe_browsing/chunk_range.h" 19 20class GURL; 21 22class SBEntry; 23 24// A truncated hash's type. 25typedef uint32 SBPrefix; 26 27// Container for holding a chunk URL and the list it belongs to. 28struct ChunkUrl { 29 std::string url; 30 std::string list_name; 31}; 32 33// A full hash. 34union SBFullHash { 35 char full_hash[32]; 36 SBPrefix prefix; 37}; 38 39inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) { 40 return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)); 41} 42 43// Generate full hash for the given string. 44SBFullHash SBFullHashForString(const base::StringPiece& str); 45 46// Container for information about a specific host in an add/sub chunk. 47struct SBChunkHost { 48 SBPrefix host; 49 SBEntry* entry; 50}; 51 52// Container for an add/sub chunk. 53struct SBChunk { 54 SBChunk(); 55 ~SBChunk(); 56 57 int chunk_number; 58 int list_id; 59 bool is_add; 60 std::deque<SBChunkHost> hosts; 61}; 62 63// Container for a set of chunks. Interim wrapper to replace use of 64// |std::deque<SBChunk>| with something having safer memory semantics. 65// management. 66// TODO(shess): |SBEntry| is currently a very roundabout way to hold 67// things pending storage. It could be replaced with the structures 68// used in SafeBrowsingStore, then lots of bridging code could 69// dissappear. 70class SBChunkList { 71 public: 72 SBChunkList(); 73 ~SBChunkList(); 74 75 // Implement that subset of the |std::deque<>| interface which 76 // callers expect. 77 bool empty() const { return chunks_.empty(); } 78 size_t size() { return chunks_.size(); } 79 80 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); } 81 SBChunk& back() { return chunks_.back(); } 82 SBChunk& front() { return chunks_.front(); } 83 const SBChunk& front() const { return chunks_.front(); } 84 85 typedef std::vector<SBChunk>::const_iterator const_iterator; 86 const_iterator begin() const { return chunks_.begin(); } 87 const_iterator end() const { return chunks_.end(); } 88 89 typedef std::vector<SBChunk>::iterator iterator; 90 iterator begin() { return chunks_.begin(); } 91 iterator end() { return chunks_.end(); } 92 93 SBChunk& operator[](size_t n) { return chunks_[n]; } 94 const SBChunk& operator[](size_t n) const { return chunks_[n]; } 95 96 // Calls |SBEvent::Destroy()| before clearing |chunks_|. 97 void clear(); 98 99 private: 100 std::vector<SBChunk> chunks_; 101 102 DISALLOW_COPY_AND_ASSIGN(SBChunkList); 103}; 104 105// Used when we get a gethash response. 106struct SBFullHashResult { 107 SBFullHash hash; 108 std::string list_name; 109 int add_chunk_id; 110}; 111 112// Contains information about a list in the database. 113struct SBListChunkRanges { 114 explicit SBListChunkRanges(const std::string& n); 115 116 std::string name; // The list name. 117 std::string adds; // The ranges for add chunks. 118 std::string subs; // The ranges for sub chunks. 119}; 120 121// Container for deleting chunks from the database. 122struct SBChunkDelete { 123 SBChunkDelete(); 124 ~SBChunkDelete(); 125 126 std::string list_name; 127 bool is_sub_del; 128 std::vector<ChunkRange> chunk_del; 129}; 130 131// Different types of threats that SafeBrowsing protects against. 132enum SBThreatType { 133 // No threat at all. 134 SB_THREAT_TYPE_SAFE, 135 136 // The URL is being used for phishing. 137 SB_THREAT_TYPE_URL_PHISHING, 138 139 // The URL hosts malware. 140 SB_THREAT_TYPE_URL_MALWARE, 141 142 // The download URL is malware. 143 SB_THREAT_TYPE_BINARY_MALWARE_URL, 144 145 // Url detected by the client-side phishing model. Note that unlike the 146 // above values, this does not correspond to a downloaded list. 147 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, 148 149 // The Chrome extension or app (given by its ID) is malware. 150 SB_THREAT_TYPE_EXTENSION, 151 152 // Url detected by the client-side malware IP list. This IP list is part 153 // of the client side detection model. 154 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL, 155}; 156 157// SBEntry --------------------------------------------------------------------- 158 159// Holds information about the prefixes for a hostkey. prefixes can either be 160// 4 bytes (truncated hash) or 32 bytes (full hash). 161// For adds: 162// [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] 163// For subs: 164// [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] 165// [add chunk][prefix][add chunk][prefix] 166class SBEntry { 167 public: 168 enum Type { 169 ADD_PREFIX, // 4 byte add entry. 170 SUB_PREFIX, // 4 byte sub entry. 171 ADD_FULL_HASH, // 32 byte add entry. 172 SUB_FULL_HASH, // 32 byte sub entry. 173 }; 174 175 // Creates a SBEntry with the necessary size for the given number of prefixes. 176 // Caller ownes the object and needs to free it by calling Destroy. 177 static SBEntry* Create(Type type, int prefix_count); 178 179 // Frees the entry's memory. 180 void Destroy(); 181 182 void set_list_id(int list_id) { data_.list_id = list_id; } 183 int list_id() const { return data_.list_id; } 184 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } 185 int chunk_id() const { return data_.chunk_id; } 186 int prefix_count() const { return data_.prefix_count; } 187 188 // Returns true if this is a prefix as opposed to a full hash. 189 bool IsPrefix() const { 190 return type() == ADD_PREFIX || type() == SUB_PREFIX; 191 } 192 193 // Returns true if this is an add entry. 194 bool IsAdd() const { 195 return type() == ADD_PREFIX || type() == ADD_FULL_HASH; 196 } 197 198 // Returns true if this is a sub entry. 199 bool IsSub() const { 200 return type() == SUB_PREFIX || type() == SUB_FULL_HASH; 201 } 202 203 // Helper to return the size of the prefixes. 204 int HashLen() const { 205 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash); 206 } 207 208 // For add entries, returns the add chunk id. For sub entries, returns the 209 // add_chunk id for the prefix at the given index. 210 int ChunkIdAtPrefix(int index) const; 211 212 // Used for sub chunks to set the chunk id at a given index. 213 void SetChunkIdAtPrefix(int index, int chunk_id); 214 215 // Return the prefix/full hash at the given index. Caller is expected to 216 // call the right function based on the hash length. 217 const SBPrefix& PrefixAt(int index) const; 218 const SBFullHash& FullHashAt(int index) const; 219 220 // Return the prefix/full hash at the given index. Caller is expected to 221 // call the right function based on the hash length. 222 void SetPrefixAt(int index, const SBPrefix& prefix); 223 void SetFullHashAt(int index, const SBFullHash& full_hash); 224 225 private: 226 // Container for a sub prefix. 227 struct SBSubPrefix { 228 int add_chunk; 229 SBPrefix prefix; 230 }; 231 232 // Container for a sub full hash. 233 struct SBSubFullHash { 234 int add_chunk; 235 SBFullHash prefix; 236 }; 237 238 // Keep the fixed data together in one struct so that we can get its size 239 // easily. If any of this is modified, the database will have to be cleared. 240 struct Data { 241 int list_id; 242 // For adds, this is the add chunk number. 243 // For subs: if prefix_count is 0 then this is the add chunk that this sub 244 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes 245 // or sub_full_hashes is used for each corresponding prefix. 246 int chunk_id; 247 Type type; 248 int prefix_count; 249 }; 250 251 SBEntry(); 252 ~SBEntry(); 253 254 // Helper to return the size of each prefix entry (i.e. for subs this 255 // includes an add chunk id). 256 static int PrefixSize(Type type); 257 258 // Helper to return how much memory a given Entry would require. 259 static int Size(Type type, int prefix_count); 260 261 // Returns how many bytes this entry is. 262 int Size() const; 263 264 Type type() const { return data_.type; } 265 266 void set_prefix_count(int count) { data_.prefix_count = count; } 267 void set_type(Type type) { data_.type = type; } 268 269 // The prefixes union must follow the fixed data so that they're contiguous 270 // in memory. 271 Data data_; 272 union { 273 SBPrefix add_prefixes_[1]; 274 SBSubPrefix sub_prefixes_[1]; 275 SBFullHash add_full_hashes_[1]; 276 SBSubFullHash sub_full_hashes_[1]; 277 }; 278}; 279 280 281// Utility functions ----------------------------------------------------------- 282 283namespace safe_browsing_util { 284 285// SafeBrowsing list names. 286extern const char kMalwareList[]; 287extern const char kPhishingList[]; 288// Binary Download list name. 289extern const char kBinUrlList[]; 290// SafeBrowsing client-side detection whitelist list name. 291extern const char kCsdWhiteList[]; 292// SafeBrowsing download whitelist list name. 293extern const char kDownloadWhiteList[]; 294// SafeBrowsing extension list name. 295extern const char kExtensionBlacklist[]; 296// SafeBrowsing side-effect free whitelist name. 297extern const char kSideEffectFreeWhitelist[]; 298// SafeBrowsing csd malware IP blacklist name. 299extern const char kIPBlacklist[]; 300 301// This array must contain all Safe Browsing lists. 302extern const char* kAllLists[8]; 303 304enum ListType { 305 INVALID = -1, 306 MALWARE = 0, 307 PHISH = 1, 308 BINURL = 2, 309 // Obsolete BINHASH = 3, 310 CSDWHITELIST = 4, 311 // SafeBrowsing lists are stored in pairs. Keep ListType 5 312 // available for a potential second list that we would store in the 313 // csd-whitelist store file. 314 DOWNLOADWHITELIST = 6, 315 // See above comment. Leave 7 available. 316 EXTENSIONBLACKLIST = 8, 317 // See above comment. Leave 9 available. 318 SIDEEFFECTFREEWHITELIST = 10, 319 // See above comment. Leave 11 available. 320 IPBLACKLIST = 12, 321 // See above comment. Leave 13 available. 322}; 323 324// Maps a list name to ListType. 325ListType GetListId(const std::string& name); 326 327// Maps a ListId to list name. Return false if fails. 328bool GetListName(ListType list_id, std::string* list); 329 330// Canonicalizes url as per Google Safe Browsing Specification. 331// See section 6.1 in 332// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 333void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 334 std::string* canonicalized_path, 335 std::string* canonicalized_query); 336 337// Given a URL, returns all the hosts we need to check. They are returned 338// in order of size (i.e. b.c is first, then a.b.c). 339void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 340 341// Given a URL, returns all the paths we need to check. 342void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 343 344// Given a URL, returns all the patterns we need to check. 345void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); 346 347int GetHashIndex(const SBFullHash& hash, 348 const std::vector<SBFullHashResult>& full_hashes); 349 350// Given a URL, compare all the possible host + path full hashes to the set of 351// provided full hashes. Returns the index of the match if one is found, or -1 352// otherwise. 353int GetUrlHashIndex(const GURL& url, 354 const std::vector<SBFullHashResult>& full_hashes); 355 356bool IsPhishingList(const std::string& list_name); 357bool IsMalwareList(const std::string& list_name); 358bool IsBadbinurlList(const std::string& list_name); 359bool IsExtensionList(const std::string& list_name); 360 361GURL GeneratePhishingReportUrl(const std::string& report_page, 362 const std::string& url_to_report, 363 bool is_client_side_detection); 364 365SBFullHash StringToSBFullHash(const std::string& hash_in); 366std::string SBFullHashToString(const SBFullHash& hash_out); 367 368} // namespace safe_browsing_util 369 370#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 371