safe_browsing_util.h revision f8ee788a64d60abd8f2d742a5fdedde054ecd910
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Utilities for the SafeBrowsing code.
6
7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
9
10#include <cstring>
11#include <set>
12#include <string>
13#include <vector>
14
15#include "base/basictypes.h"
16#include "base/memory/scoped_ptr.h"
17#include "base/strings/string_piece.h"
18#include "chrome/browser/safe_browsing/chunk_range.h"
19
20namespace safe_browsing {
21class ChunkData;
22};
23
24class GURL;
25
26// A truncated hash's type.
27typedef uint32 SBPrefix;
28
29// Container for holding a chunk URL and the list it belongs to.
30struct ChunkUrl {
31  std::string url;
32  std::string list_name;
33};
34
35// A full hash.
36union SBFullHash {
37  char full_hash[32];
38  SBPrefix prefix;
39};
40
41inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) {
42  return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
43}
44
45inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
46  return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
47}
48
49// Generate full hash for the given string.
50SBFullHash SBFullHashForString(const base::StringPiece& str);
51
52// Data for an individual chunk sent from the server.
53class SBChunkData {
54 public:
55  SBChunkData();
56  ~SBChunkData();
57
58  // Create with manufactured data, for testing only.
59  // TODO(shess): Right now the test code calling this is in an anonymous
60  // namespace.  Figure out how to shift this into private:.
61  explicit SBChunkData(safe_browsing::ChunkData* chunk_data);
62
63  // Read serialized ChunkData, returning true if the parse suceeded.
64  bool ParseFrom(const unsigned char* data, size_t length);
65
66  // Access the chunk data.  |AddChunkNumberAt()| can only be called if
67  // |IsSub()| returns true.  |Prefix*()| and |FullHash*()| can only be called
68  // if the corrosponding |Is*()| returned true.
69  int ChunkNumber() const;
70  bool IsAdd() const;
71  bool IsSub() const;
72  int AddChunkNumberAt(size_t i) const;
73  bool IsPrefix() const;
74  size_t PrefixCount() const;
75  SBPrefix PrefixAt(size_t i) const;
76  bool IsFullHash() const;
77  size_t FullHashCount() const;
78  SBFullHash FullHashAt(size_t i) const;
79
80 private:
81  // Protocol buffer sent from server.
82  scoped_ptr<safe_browsing::ChunkData> chunk_data_;
83
84  DISALLOW_COPY_AND_ASSIGN(SBChunkData);
85};
86
87// Used when we get a gethash response.
88struct SBFullHashResult {
89  SBFullHash hash;
90  // TODO(shess): Refactor to allow ListType here.
91  int list_id;
92};
93
94// Contains information about a list in the database.
95struct SBListChunkRanges {
96  explicit SBListChunkRanges(const std::string& n);
97
98  std::string name;  // The list name.
99  std::string adds;  // The ranges for add chunks.
100  std::string subs;  // The ranges for sub chunks.
101};
102
103// Container for deleting chunks from the database.
104struct SBChunkDelete {
105  SBChunkDelete();
106  ~SBChunkDelete();
107
108  std::string list_name;
109  bool is_sub_del;
110  std::vector<ChunkRange> chunk_del;
111};
112
113// Different types of threats that SafeBrowsing protects against.
114enum SBThreatType {
115  // No threat at all.
116  SB_THREAT_TYPE_SAFE,
117
118  // The URL is being used for phishing.
119  SB_THREAT_TYPE_URL_PHISHING,
120
121  // The URL hosts malware.
122  SB_THREAT_TYPE_URL_MALWARE,
123
124  // The download URL is malware.
125  SB_THREAT_TYPE_BINARY_MALWARE_URL,
126
127  // Url detected by the client-side phishing model.  Note that unlike the
128  // above values, this does not correspond to a downloaded list.
129  SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
130
131  // The Chrome extension or app (given by its ID) is malware.
132  SB_THREAT_TYPE_EXTENSION,
133
134  // Url detected by the client-side malware IP list. This IP list is part
135  // of the client side detection model.
136  SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
137};
138
139// Utility functions -----------------------------------------------------------
140
141namespace safe_browsing_util {
142
143// SafeBrowsing list names.
144extern const char kMalwareList[];
145extern const char kPhishingList[];
146// Binary Download list name.
147extern const char kBinUrlList[];
148// SafeBrowsing client-side detection whitelist list name.
149extern const char kCsdWhiteList[];
150// SafeBrowsing download whitelist list name.
151extern const char kDownloadWhiteList[];
152// SafeBrowsing extension list name.
153extern const char kExtensionBlacklist[];
154// SafeBrowsing side-effect free whitelist name.
155extern const char kSideEffectFreeWhitelist[];
156// SafeBrowsing csd malware IP blacklist name.
157extern const char kIPBlacklist[];
158
159// This array must contain all Safe Browsing lists.
160extern const char* kAllLists[8];
161
162enum ListType {
163  INVALID = -1,
164  MALWARE = 0,
165  PHISH = 1,
166  BINURL = 2,
167  // Obsolete BINHASH = 3,
168  CSDWHITELIST = 4,
169  // SafeBrowsing lists are stored in pairs.  Keep ListType 5
170  // available for a potential second list that we would store in the
171  // csd-whitelist store file.
172  DOWNLOADWHITELIST = 6,
173  // See above comment. Leave 7 available.
174  EXTENSIONBLACKLIST = 8,
175  // See above comment. Leave 9 available.
176  SIDEEFFECTFREEWHITELIST = 10,
177  // See above comment. Leave 11 available.
178  IPBLACKLIST = 12,
179  // See above comment.  Leave 13 available.
180};
181
182// Maps a list name to ListType.
183ListType GetListId(const base::StringPiece& name);
184
185// Maps a ListId to list name. Return false if fails.
186bool GetListName(ListType list_id, std::string* list);
187
188// Canonicalizes url as per Google Safe Browsing Specification.
189// See section 6.1 in
190// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
191void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
192                     std::string* canonicalized_path,
193                     std::string* canonicalized_query);
194
195// Given a URL, returns all the hosts we need to check.  They are returned
196// in order of size (i.e. b.c is first, then a.b.c).
197void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
198
199// Given a URL, returns all the paths we need to check.
200void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
201
202// Given a URL, returns all the patterns we need to check.
203void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
204
205GURL GeneratePhishingReportUrl(const std::string& report_page,
206                               const std::string& url_to_report,
207                               bool is_client_side_detection);
208
209SBFullHash StringToSBFullHash(const std::string& hash_in);
210std::string SBFullHashToString(const SBFullHash& hash_out);
211
212}  // namespace safe_browsing_util
213
214#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
215