1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// Utilities for the SafeBrowsing code. 6 7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 10#include <cstring> 11#include <set> 12#include <string> 13#include <vector> 14 15#include "base/basictypes.h" 16#include "base/memory/scoped_ptr.h" 17#include "base/strings/string_piece.h" 18#include "base/time/time.h" 19#include "chrome/browser/safe_browsing/chunk_range.h" 20 21namespace safe_browsing { 22class ChunkData; 23}; 24 25class GURL; 26 27// A truncated hash's type. 28typedef uint32 SBPrefix; 29 30// Container for holding a chunk URL and the list it belongs to. 31struct ChunkUrl { 32 std::string url; 33 std::string list_name; 34}; 35 36// A full hash. 37union SBFullHash { 38 char full_hash[32]; 39 SBPrefix prefix; 40}; 41 42inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) { 43 return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)); 44} 45 46inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) { 47 return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0; 48} 49 50// Generate full hash for the given string. 51SBFullHash SBFullHashForString(const base::StringPiece& str); 52 53// Data for an individual chunk sent from the server. 54class SBChunkData { 55 public: 56 SBChunkData(); 57 ~SBChunkData(); 58 59 // Create with manufactured data, for testing only. 60 // TODO(shess): Right now the test code calling this is in an anonymous 61 // namespace. Figure out how to shift this into private:. 62 explicit SBChunkData(safe_browsing::ChunkData* chunk_data); 63 64 // Read serialized ChunkData, returning true if the parse suceeded. 65 bool ParseFrom(const unsigned char* data, size_t length); 66 67 // Access the chunk data. |AddChunkNumberAt()| can only be called if 68 // |IsSub()| returns true. |Prefix*()| and |FullHash*()| can only be called 69 // if the corrosponding |Is*()| returned true. 70 int ChunkNumber() const; 71 bool IsAdd() const; 72 bool IsSub() const; 73 int AddChunkNumberAt(size_t i) const; 74 bool IsPrefix() const; 75 size_t PrefixCount() const; 76 SBPrefix PrefixAt(size_t i) const; 77 bool IsFullHash() const; 78 size_t FullHashCount() const; 79 SBFullHash FullHashAt(size_t i) const; 80 81 private: 82 // Protocol buffer sent from server. 83 scoped_ptr<safe_browsing::ChunkData> chunk_data_; 84 85 DISALLOW_COPY_AND_ASSIGN(SBChunkData); 86}; 87 88// Used when we get a gethash response. 89struct SBFullHashResult { 90 SBFullHash hash; 91 // TODO(shess): Refactor to allow ListType here. 92 int list_id; 93 std::string metadata; 94}; 95 96// Caches individual response from GETHASH request. 97struct SBCachedFullHashResult { 98 SBCachedFullHashResult(); 99 explicit SBCachedFullHashResult(const base::Time& in_expire_after); 100 ~SBCachedFullHashResult(); 101 102 base::Time expire_after; 103 std::vector<SBFullHashResult> full_hashes; 104}; 105 106// Contains information about a list in the database. 107struct SBListChunkRanges { 108 explicit SBListChunkRanges(const std::string& n); 109 110 std::string name; // The list name. 111 std::string adds; // The ranges for add chunks. 112 std::string subs; // The ranges for sub chunks. 113}; 114 115// Container for deleting chunks from the database. 116struct SBChunkDelete { 117 SBChunkDelete(); 118 ~SBChunkDelete(); 119 120 std::string list_name; 121 bool is_sub_del; 122 std::vector<ChunkRange> chunk_del; 123}; 124 125// Different types of threats that SafeBrowsing protects against. 126enum SBThreatType { 127 // No threat at all. 128 SB_THREAT_TYPE_SAFE, 129 130 // The URL is being used for phishing. 131 SB_THREAT_TYPE_URL_PHISHING, 132 133 // The URL hosts malware. 134 SB_THREAT_TYPE_URL_MALWARE, 135 136 // The URL hosts harmful programs. 137 SB_THREAT_TYPE_URL_HARMFUL, 138 139 // The download URL is malware. 140 SB_THREAT_TYPE_BINARY_MALWARE_URL, 141 142 // Url detected by the client-side phishing model. Note that unlike the 143 // above values, this does not correspond to a downloaded list. 144 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, 145 146 // The Chrome extension or app (given by its ID) is malware. 147 SB_THREAT_TYPE_EXTENSION, 148 149 // Url detected by the client-side malware IP list. This IP list is part 150 // of the client side detection model. 151 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL, 152}; 153 154// Utility functions ----------------------------------------------------------- 155 156namespace safe_browsing_util { 157 158// SafeBrowsing list names. 159extern const char kMalwareList[]; 160extern const char kPhishingList[]; 161// Binary Download list name. 162extern const char kBinUrlList[]; 163// SafeBrowsing client-side detection whitelist list name. 164extern const char kCsdWhiteList[]; 165// SafeBrowsing download whitelist list name. 166extern const char kDownloadWhiteList[]; 167// SafeBrowsing extension list name. 168extern const char kExtensionBlacklist[]; 169// SafeBrowsing side-effect free whitelist name. 170extern const char kSideEffectFreeWhitelist[]; 171// SafeBrowsing csd malware IP blacklist name. 172extern const char kIPBlacklist[]; 173 174// This array must contain all Safe Browsing lists. 175extern const char* kAllLists[8]; 176 177enum ListType { 178 INVALID = -1, 179 MALWARE = 0, 180 PHISH = 1, 181 BINURL = 2, 182 // Obsolete BINHASH = 3, 183 CSDWHITELIST = 4, 184 // SafeBrowsing lists are stored in pairs. Keep ListType 5 185 // available for a potential second list that we would store in the 186 // csd-whitelist store file. 187 DOWNLOADWHITELIST = 6, 188 // See above comment. Leave 7 available. 189 EXTENSIONBLACKLIST = 8, 190 // See above comment. Leave 9 available. 191 SIDEEFFECTFREEWHITELIST = 10, 192 // See above comment. Leave 11 available. 193 IPBLACKLIST = 12, 194 // See above comment. Leave 13 available. 195}; 196 197// Maps a list name to ListType. 198ListType GetListId(const base::StringPiece& name); 199 200// Maps a ListId to list name. Return false if fails. 201bool GetListName(ListType list_id, std::string* list); 202 203// Canonicalizes url as per Google Safe Browsing Specification. 204// See section 6.1 in 205// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 206void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 207 std::string* canonicalized_path, 208 std::string* canonicalized_query); 209 210// Given a URL, returns all the hosts we need to check. They are returned 211// in order of size (i.e. b.c is first, then a.b.c). 212void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 213 214// Given a URL, returns all the paths we need to check. 215void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 216 217// Given a URL, returns all the patterns we need to check. 218void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); 219 220GURL GeneratePhishingReportUrl(const std::string& report_page, 221 const std::string& url_to_report, 222 bool is_client_side_detection); 223 224SBFullHash StringToSBFullHash(const std::string& hash_in); 225std::string SBFullHashToString(const SBFullHash& hash_out); 226 227} // namespace safe_browsing_util 228 229#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 230