1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Utilities for the SafeBrowsing code.
6
7#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
9
10#include <cstring>
11#include <set>
12#include <string>
13#include <vector>
14
15#include "base/basictypes.h"
16#include "base/memory/scoped_ptr.h"
17#include "base/strings/string_piece.h"
18#include "base/time/time.h"
19#include "chrome/browser/safe_browsing/chunk_range.h"
20
21namespace safe_browsing {
22class ChunkData;
23};
24
25class GURL;
26
27// A truncated hash's type.
28typedef uint32 SBPrefix;
29
30// Container for holding a chunk URL and the list it belongs to.
31struct ChunkUrl {
32  std::string url;
33  std::string list_name;
34};
35
36// A full hash.
37union SBFullHash {
38  char full_hash[32];
39  SBPrefix prefix;
40};
41
42inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) {
43  return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
44}
45
46inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
47  return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
48}
49
50// Generate full hash for the given string.
51SBFullHash SBFullHashForString(const base::StringPiece& str);
52
53// Data for an individual chunk sent from the server.
54class SBChunkData {
55 public:
56  SBChunkData();
57  ~SBChunkData();
58
59  // Create with manufactured data, for testing only.
60  // TODO(shess): Right now the test code calling this is in an anonymous
61  // namespace.  Figure out how to shift this into private:.
62  explicit SBChunkData(safe_browsing::ChunkData* chunk_data);
63
64  // Read serialized ChunkData, returning true if the parse suceeded.
65  bool ParseFrom(const unsigned char* data, size_t length);
66
67  // Access the chunk data.  |AddChunkNumberAt()| can only be called if
68  // |IsSub()| returns true.  |Prefix*()| and |FullHash*()| can only be called
69  // if the corrosponding |Is*()| returned true.
70  int ChunkNumber() const;
71  bool IsAdd() const;
72  bool IsSub() const;
73  int AddChunkNumberAt(size_t i) const;
74  bool IsPrefix() const;
75  size_t PrefixCount() const;
76  SBPrefix PrefixAt(size_t i) const;
77  bool IsFullHash() const;
78  size_t FullHashCount() const;
79  SBFullHash FullHashAt(size_t i) const;
80
81 private:
82  // Protocol buffer sent from server.
83  scoped_ptr<safe_browsing::ChunkData> chunk_data_;
84
85  DISALLOW_COPY_AND_ASSIGN(SBChunkData);
86};
87
88// Used when we get a gethash response.
89struct SBFullHashResult {
90  SBFullHash hash;
91  // TODO(shess): Refactor to allow ListType here.
92  int list_id;
93  std::string metadata;
94};
95
96// Caches individual response from GETHASH request.
97struct SBCachedFullHashResult {
98  SBCachedFullHashResult();
99  explicit SBCachedFullHashResult(const base::Time& in_expire_after);
100  ~SBCachedFullHashResult();
101
102  base::Time expire_after;
103  std::vector<SBFullHashResult> full_hashes;
104};
105
106// Contains information about a list in the database.
107struct SBListChunkRanges {
108  explicit SBListChunkRanges(const std::string& n);
109
110  std::string name;  // The list name.
111  std::string adds;  // The ranges for add chunks.
112  std::string subs;  // The ranges for sub chunks.
113};
114
115// Container for deleting chunks from the database.
116struct SBChunkDelete {
117  SBChunkDelete();
118  ~SBChunkDelete();
119
120  std::string list_name;
121  bool is_sub_del;
122  std::vector<ChunkRange> chunk_del;
123};
124
125// Different types of threats that SafeBrowsing protects against.
126enum SBThreatType {
127  // No threat at all.
128  SB_THREAT_TYPE_SAFE,
129
130  // The URL is being used for phishing.
131  SB_THREAT_TYPE_URL_PHISHING,
132
133  // The URL hosts malware.
134  SB_THREAT_TYPE_URL_MALWARE,
135
136  // The URL hosts harmful programs.
137  SB_THREAT_TYPE_URL_HARMFUL,
138
139  // The download URL is malware.
140  SB_THREAT_TYPE_BINARY_MALWARE_URL,
141
142  // Url detected by the client-side phishing model.  Note that unlike the
143  // above values, this does not correspond to a downloaded list.
144  SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
145
146  // The Chrome extension or app (given by its ID) is malware.
147  SB_THREAT_TYPE_EXTENSION,
148
149  // Url detected by the client-side malware IP list. This IP list is part
150  // of the client side detection model.
151  SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
152};
153
154// Utility functions -----------------------------------------------------------
155
156namespace safe_browsing_util {
157
158// SafeBrowsing list names.
159extern const char kMalwareList[];
160extern const char kPhishingList[];
161// Binary Download list name.
162extern const char kBinUrlList[];
163// SafeBrowsing client-side detection whitelist list name.
164extern const char kCsdWhiteList[];
165// SafeBrowsing download whitelist list name.
166extern const char kDownloadWhiteList[];
167// SafeBrowsing extension list name.
168extern const char kExtensionBlacklist[];
169// SafeBrowsing side-effect free whitelist name.
170extern const char kSideEffectFreeWhitelist[];
171// SafeBrowsing csd malware IP blacklist name.
172extern const char kIPBlacklist[];
173
174// This array must contain all Safe Browsing lists.
175extern const char* kAllLists[8];
176
177enum ListType {
178  INVALID = -1,
179  MALWARE = 0,
180  PHISH = 1,
181  BINURL = 2,
182  // Obsolete BINHASH = 3,
183  CSDWHITELIST = 4,
184  // SafeBrowsing lists are stored in pairs.  Keep ListType 5
185  // available for a potential second list that we would store in the
186  // csd-whitelist store file.
187  DOWNLOADWHITELIST = 6,
188  // See above comment. Leave 7 available.
189  EXTENSIONBLACKLIST = 8,
190  // See above comment. Leave 9 available.
191  SIDEEFFECTFREEWHITELIST = 10,
192  // See above comment. Leave 11 available.
193  IPBLACKLIST = 12,
194  // See above comment.  Leave 13 available.
195};
196
197// Maps a list name to ListType.
198ListType GetListId(const base::StringPiece& name);
199
200// Maps a ListId to list name. Return false if fails.
201bool GetListName(ListType list_id, std::string* list);
202
203// Canonicalizes url as per Google Safe Browsing Specification.
204// See section 6.1 in
205// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
206void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
207                     std::string* canonicalized_path,
208                     std::string* canonicalized_query);
209
210// Given a URL, returns all the hosts we need to check.  They are returned
211// in order of size (i.e. b.c is first, then a.b.c).
212void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
213
214// Given a URL, returns all the paths we need to check.
215void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
216
217// Given a URL, returns all the patterns we need to check.
218void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
219
220GURL GeneratePhishingReportUrl(const std::string& report_page,
221                               const std::string& url_to_report,
222                               bool is_client_side_detection);
223
224SBFullHash StringToSBFullHash(const std::string& hash_in);
225std::string SBFullHashToString(const SBFullHash& hash_out);
226
227}  // namespace safe_browsing_util
228
229#endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
230