safe_browsing_util.h revision f8ee788a64d60abd8f2d742a5fdedde054ecd910
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// Utilities for the SafeBrowsing code. 6 7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 10#include <cstring> 11#include <set> 12#include <string> 13#include <vector> 14 15#include "base/basictypes.h" 16#include "base/memory/scoped_ptr.h" 17#include "base/strings/string_piece.h" 18#include "chrome/browser/safe_browsing/chunk_range.h" 19 20namespace safe_browsing { 21class ChunkData; 22}; 23 24class GURL; 25 26// A truncated hash's type. 27typedef uint32 SBPrefix; 28 29// Container for holding a chunk URL and the list it belongs to. 30struct ChunkUrl { 31 std::string url; 32 std::string list_name; 33}; 34 35// A full hash. 36union SBFullHash { 37 char full_hash[32]; 38 SBPrefix prefix; 39}; 40 41inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) { 42 return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)); 43} 44 45inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) { 46 return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0; 47} 48 49// Generate full hash for the given string. 50SBFullHash SBFullHashForString(const base::StringPiece& str); 51 52// Data for an individual chunk sent from the server. 53class SBChunkData { 54 public: 55 SBChunkData(); 56 ~SBChunkData(); 57 58 // Create with manufactured data, for testing only. 59 // TODO(shess): Right now the test code calling this is in an anonymous 60 // namespace. Figure out how to shift this into private:. 61 explicit SBChunkData(safe_browsing::ChunkData* chunk_data); 62 63 // Read serialized ChunkData, returning true if the parse suceeded. 64 bool ParseFrom(const unsigned char* data, size_t length); 65 66 // Access the chunk data. |AddChunkNumberAt()| can only be called if 67 // |IsSub()| returns true. |Prefix*()| and |FullHash*()| can only be called 68 // if the corrosponding |Is*()| returned true. 69 int ChunkNumber() const; 70 bool IsAdd() const; 71 bool IsSub() const; 72 int AddChunkNumberAt(size_t i) const; 73 bool IsPrefix() const; 74 size_t PrefixCount() const; 75 SBPrefix PrefixAt(size_t i) const; 76 bool IsFullHash() const; 77 size_t FullHashCount() const; 78 SBFullHash FullHashAt(size_t i) const; 79 80 private: 81 // Protocol buffer sent from server. 82 scoped_ptr<safe_browsing::ChunkData> chunk_data_; 83 84 DISALLOW_COPY_AND_ASSIGN(SBChunkData); 85}; 86 87// Used when we get a gethash response. 88struct SBFullHashResult { 89 SBFullHash hash; 90 // TODO(shess): Refactor to allow ListType here. 91 int list_id; 92}; 93 94// Contains information about a list in the database. 95struct SBListChunkRanges { 96 explicit SBListChunkRanges(const std::string& n); 97 98 std::string name; // The list name. 99 std::string adds; // The ranges for add chunks. 100 std::string subs; // The ranges for sub chunks. 101}; 102 103// Container for deleting chunks from the database. 104struct SBChunkDelete { 105 SBChunkDelete(); 106 ~SBChunkDelete(); 107 108 std::string list_name; 109 bool is_sub_del; 110 std::vector<ChunkRange> chunk_del; 111}; 112 113// Different types of threats that SafeBrowsing protects against. 114enum SBThreatType { 115 // No threat at all. 116 SB_THREAT_TYPE_SAFE, 117 118 // The URL is being used for phishing. 119 SB_THREAT_TYPE_URL_PHISHING, 120 121 // The URL hosts malware. 122 SB_THREAT_TYPE_URL_MALWARE, 123 124 // The download URL is malware. 125 SB_THREAT_TYPE_BINARY_MALWARE_URL, 126 127 // Url detected by the client-side phishing model. Note that unlike the 128 // above values, this does not correspond to a downloaded list. 129 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, 130 131 // The Chrome extension or app (given by its ID) is malware. 132 SB_THREAT_TYPE_EXTENSION, 133 134 // Url detected by the client-side malware IP list. This IP list is part 135 // of the client side detection model. 136 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL, 137}; 138 139// Utility functions ----------------------------------------------------------- 140 141namespace safe_browsing_util { 142 143// SafeBrowsing list names. 144extern const char kMalwareList[]; 145extern const char kPhishingList[]; 146// Binary Download list name. 147extern const char kBinUrlList[]; 148// SafeBrowsing client-side detection whitelist list name. 149extern const char kCsdWhiteList[]; 150// SafeBrowsing download whitelist list name. 151extern const char kDownloadWhiteList[]; 152// SafeBrowsing extension list name. 153extern const char kExtensionBlacklist[]; 154// SafeBrowsing side-effect free whitelist name. 155extern const char kSideEffectFreeWhitelist[]; 156// SafeBrowsing csd malware IP blacklist name. 157extern const char kIPBlacklist[]; 158 159// This array must contain all Safe Browsing lists. 160extern const char* kAllLists[8]; 161 162enum ListType { 163 INVALID = -1, 164 MALWARE = 0, 165 PHISH = 1, 166 BINURL = 2, 167 // Obsolete BINHASH = 3, 168 CSDWHITELIST = 4, 169 // SafeBrowsing lists are stored in pairs. Keep ListType 5 170 // available for a potential second list that we would store in the 171 // csd-whitelist store file. 172 DOWNLOADWHITELIST = 6, 173 // See above comment. Leave 7 available. 174 EXTENSIONBLACKLIST = 8, 175 // See above comment. Leave 9 available. 176 SIDEEFFECTFREEWHITELIST = 10, 177 // See above comment. Leave 11 available. 178 IPBLACKLIST = 12, 179 // See above comment. Leave 13 available. 180}; 181 182// Maps a list name to ListType. 183ListType GetListId(const base::StringPiece& name); 184 185// Maps a ListId to list name. Return false if fails. 186bool GetListName(ListType list_id, std::string* list); 187 188// Canonicalizes url as per Google Safe Browsing Specification. 189// See section 6.1 in 190// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 191void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 192 std::string* canonicalized_path, 193 std::string* canonicalized_query); 194 195// Given a URL, returns all the hosts we need to check. They are returned 196// in order of size (i.e. b.c is first, then a.b.c). 197void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 198 199// Given a URL, returns all the paths we need to check. 200void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 201 202// Given a URL, returns all the patterns we need to check. 203void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); 204 205GURL GeneratePhishingReportUrl(const std::string& report_page, 206 const std::string& url_to_report, 207 bool is_client_side_detection); 208 209SBFullHash StringToSBFullHash(const std::string& hash_in); 210std::string SBFullHashToString(const SBFullHash& hash_out); 211 212} // namespace safe_browsing_util 213 214#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 215