1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/safe_browsing/safe_browsing_util.h"
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/base64.h"
8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h"
9ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "crypto/hmac.h"
10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "crypto/sha2.h"
113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "chrome/browser/google/google_util.h"
12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/gurl.h"
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/url_util.h"
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "net/base/escape.h"
15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "unicode/locid.h"
16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#if defined(OS_WIN)
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/installer/util/browser_distribution.h"
19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif
20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const int kSafeBrowsingMacDigestSize = 20;
22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
23c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Continue to this URL after submitting the phishing report form.
24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// TODO(paulg): Change to a Chrome specific URL.
25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const char kContinueUrlFormat[] =
26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  "http://www.google.com/tools/firefox/toolbar/FT2/intl/%s/submit_success.html";
27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const char kReportParams[] = "?tpl=%s&continue=%s&url=%s";
29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
30731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// SBChunk ---------------------------------------------------------------------
31731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
32731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunk::SBChunk()
33731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    : chunk_number(0),
34731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      list_id(0),
35731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      is_add(false) {
36731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
37731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
38731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunk::~SBChunk() {}
39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// SBChunkList -----------------------------------------------------------------
41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
42731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunkList::SBChunkList() {}
43731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
44731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunkList::~SBChunkList() {
45731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  clear();
46731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
47731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBChunkList::clear() {
49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (std::vector<SBChunk>::iterator citer = chunks_.begin();
50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       citer != chunks_.end(); ++citer) {
51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    for (std::deque<SBChunkHost>::iterator hiter = citer->hosts.begin();
52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch         hiter != citer->hosts.end(); ++hiter) {
53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (hiter->entry) {
54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        hiter->entry->Destroy();
55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        hiter->entry = NULL;
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      }
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  chunks_.clear();
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
62731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// SBListChunkRanges -----------------------------------------------------------
63731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
64731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBListChunkRanges::SBListChunkRanges(const std::string& n) : name(n) {}
65731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
66731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// SBChunkDelete ---------------------------------------------------------------
67731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
68731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunkDelete::SBChunkDelete() : is_sub_del(false) {}
69731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
70731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunkDelete::~SBChunkDelete() {}
71731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// SBEntry ---------------------------------------------------------------------
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochSBEntry* SBEntry::Create(Type type, int prefix_count) {
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int size = Size(type, prefix_count);
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  SBEntry *rv = static_cast<SBEntry*>(malloc(size));
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  memset(rv, 0, size);
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  rv->set_type(type);
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  rv->set_prefix_count(prefix_count);
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return rv;
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBEntry::Destroy() {
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  free(this);
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint SBEntry::PrefixSize(Type type) {
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  switch (type) {
91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    case ADD_PREFIX:
92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return sizeof(SBPrefix);
93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    case ADD_FULL_HASH:
94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return sizeof(SBFullHash);
95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    case SUB_PREFIX:
96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return sizeof(SBSubPrefix);
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    case SUB_FULL_HASH:
98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return sizeof(SBSubFullHash);
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    default:
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      NOTREACHED();
101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return 0;
102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint SBEntry::Size() const {
106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return Size(type(), prefix_count());
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static
110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint SBEntry::Size(Type type, int prefix_count) {
111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return sizeof(Data) + prefix_count * PrefixSize(type);
112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint SBEntry::ChunkIdAtPrefix(int index) const {
115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (type() == SUB_PREFIX)
116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return sub_prefixes_[index].add_chunk;
117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return (type() == SUB_FULL_HASH) ?
118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      sub_full_hashes_[index].add_chunk : chunk_id();
119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBEntry::SetChunkIdAtPrefix(int index, int chunk_id) {
122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(IsSub());
123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (type() == SUB_PREFIX)
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    sub_prefixes_[index].add_chunk = chunk_id;
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  else
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    sub_full_hashes_[index].add_chunk = chunk_id;
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst SBPrefix& SBEntry::PrefixAt(int index) const {
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(IsPrefix());
132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return IsAdd() ? add_prefixes_[index] : sub_prefixes_[index].prefix;
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst SBFullHash& SBEntry::FullHashAt(int index) const {
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(!IsPrefix());
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return IsAdd() ? add_full_hashes_[index] : sub_full_hashes_[index].prefix;
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBEntry::SetPrefixAt(int index, const SBPrefix& prefix) {
143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(IsPrefix());
144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (IsAdd())
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    add_prefixes_[index] = prefix;
147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  else
148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    sub_prefixes_[index].prefix = prefix;
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBEntry::SetFullHashAt(int index, const SBFullHash& full_hash) {
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(!IsPrefix());
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (IsAdd())
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    add_full_hashes_[index] = full_hash;
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  else
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    sub_full_hashes_[index].prefix = full_hash;
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Utility functions -----------------------------------------------------------
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace safe_browsing_util {
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
165dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Listnames that browser can process.
166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst char kMalwareList[] = "goog-malware-shavar";
167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst char kPhishingList[] = "goog-phish-shavar";
16821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenconst char kBinUrlList[] = "goog-badbinurl-shavar";
169ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenconst char kBinHashList[] = "goog-badbin-digestvar";
170ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenconst char kCsdWhiteList[] = "goog-csdwhite-sha256";
171dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint GetListId(const std::string& name) {
17321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  int id;
17421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  if (name == safe_browsing_util::kMalwareList) {
17521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    id = MALWARE;
17621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  } else if (name == safe_browsing_util::kPhishingList) {
17721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    id = PHISH;
17821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  } else if (name == safe_browsing_util::kBinUrlList) {
17921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    id = BINURL;
180ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  } else if (name == safe_browsing_util::kBinHashList) {
18121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    id = BINHASH;
182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  } else if (name == safe_browsing_util::kCsdWhiteList) {
183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    id = CSDWHITELIST;
18421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  } else {
18521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    id = INVALID;
18621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  }
18721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  return id;
188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
19021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool GetListName(int list_id, std::string* list) {
19121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  switch (list_id) {
19221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    case MALWARE:
19321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      *list = safe_browsing_util::kMalwareList;
19421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break;
19521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    case PHISH:
19621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      *list = safe_browsing_util::kPhishingList;
19721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break;
19821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    case BINURL:
19921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      *list = safe_browsing_util::kBinUrlList;
20021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break;
20121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    case BINHASH:
20221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      *list = safe_browsing_util::kBinHashList;
20321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break;
204ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    case CSDWHITELIST:
205ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      *list = safe_browsing_util::kCsdWhiteList;
206ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      break;
20721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    default:
20821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      return false;
20921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  }
21021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  return true;
211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::string Unescape(const std::string& url) {
214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string unescaped_str(url);
215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string old_unescaped_str;
216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const int kMaxLoopIterations = 1024;
217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int loop_var = 0;
218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  do {
219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    old_unescaped_str = unescaped_str;
220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    unescaped_str = UnescapeURLComponent(old_unescaped_str,
221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        UnescapeRule::CONTROL_CHARS | UnescapeRule::SPACES |
222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        UnescapeRule::URL_SPECIAL_CHARS);
223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  } while (unescaped_str != old_unescaped_str && ++loop_var <=
224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           kMaxLoopIterations);
225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return unescaped_str;
227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::string Escape(const std::string& url) {
230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string escaped_str;
231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const char* kHexString = "0123456789ABCDEF";
232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < url.length(); i++) {
233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    unsigned char c = static_cast<unsigned char>(url[i]);
234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (c <= ' ' || c > '~' || c == '#' || c == '%') {
235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escaped_str.push_back('%');
236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escaped_str.push_back(kHexString[c >> 4]);
237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escaped_str.push_back(kHexString[c & 0xf]);
238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    } else {
239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escaped_str.push_back(c);
240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return escaped_str;
244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::string RemoveConsecutiveChars(const std::string& str, const char c) {
247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string output(str);
248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string string_to_find;
249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string::size_type loc = 0;
250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string_to_find.append(2, c);
251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  while ((loc = output.find(string_to_find, loc)) != std::string::npos) {
252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    output.erase(loc, 1);
253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return output;
256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Canonicalizes url as per Google Safe Browsing Specification.
259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// See section 6.1 in
260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid CanonicalizeUrl(const GURL& url,
262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                     std::string* canonicalized_hostname,
263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                     std::string* canonicalized_path,
264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                     std::string* canonicalized_query) {
265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(url.is_valid());
266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // We only canonicalize "normal" URLs.
268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!url.IsStandard())
269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Following canonicalization steps are excluded since url parsing takes care
272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // of those :-
273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 1. Remove any tab (0x09), CR (0x0d), and LF (0x0a) chars from url.
274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //    (Exclude escaped version of these chars).
275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 2. Normalize hostname to 4 dot-seperated decimal values.
276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 3. Lowercase hostname.
277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 4. Resolve path sequences "/../" and "/./".
278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // That leaves us with the following :-
280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 1. Remove fragment in URL.
281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  GURL url_without_fragment;
282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  GURL::Replacements f_replacements;
283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  f_replacements.ClearRef();
284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  f_replacements.ClearUsername();
285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  f_replacements.ClearPassword();
286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_without_fragment = url.ReplaceComponents(f_replacements);
287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 2. Do URL unescaping until no more hex encoded characters exist.
289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string url_unescaped_str(Unescape(url_without_fragment.spec()));
290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_parse::Parsed parsed;
291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_parse::ParseStandardURL(url_unescaped_str.data(),
292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      url_unescaped_str.length(), &parsed);
293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 3. In hostname, remove all leading and trailing dots.
295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const std::string host = (parsed.host.len > 0) ? url_unescaped_str.substr(
296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      parsed.host.begin, parsed.host.len) : "";
297c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const char kCharsToTrim[] = ".";
298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string host_without_end_dots;
299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  TrimString(host, kCharsToTrim, &host_without_end_dots);
300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
301c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 4. In hostname, replace consecutive dots with a single dot.
302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string host_without_consecutive_dots(RemoveConsecutiveChars(
303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      host_without_end_dots, '.'));
304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 5. In path, replace runs of consecutive slashes with a single slash.
306c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string path = (parsed.path.len > 0) ? url_unescaped_str.substr(
307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       parsed.path.begin, parsed.path.len): "";
308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string path_without_consecutive_slash(RemoveConsecutiveChars(
309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      path, '/'));
310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_canon::Replacements<char> hp_replacements;
312c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  hp_replacements.SetHost(host_without_consecutive_dots.data(),
313c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_parse::Component(0, host_without_consecutive_dots.length()));
314c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  hp_replacements.SetPath(path_without_consecutive_slash.data(),
315c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_parse::Component(0, path_without_consecutive_slash.length()));
316c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
317c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string url_unescaped_with_can_hostpath;
318c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_canon::StdStringCanonOutput output(&url_unescaped_with_can_hostpath);
319c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_parse::Parsed temp_parsed;
320c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_util::ReplaceComponents(url_unescaped_str.data(),
321c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                              url_unescaped_str.length(), parsed,
322c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                              hp_replacements, NULL, &output, &temp_parsed);
323c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  output.Complete();
324c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
325c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 6. Step needed to revert escaping done in url_util::ReplaceComponents.
326c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_unescaped_with_can_hostpath = Unescape(url_unescaped_with_can_hostpath);
327c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
328c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // 7. After performing all above steps, percent-escape all chars in url which
329c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // are <= ASCII 32, >= 127, #, %. Escapes must be uppercase hex characters.
330c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string escaped_canon_url_str(Escape(url_unescaped_with_can_hostpath));
331c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_parse::Parsed final_parsed;
332c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_parse::ParseStandardURL(escaped_canon_url_str.data(),
333c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                              escaped_canon_url_str.length(), &final_parsed);
334c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
335c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (canonicalized_hostname && final_parsed.host.len > 0) {
336c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    *canonicalized_hostname =
337c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        escaped_canon_url_str.substr(final_parsed.host.begin,
338c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                     final_parsed.host.len);
339c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
340c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (canonicalized_path && final_parsed.path.len > 0) {
341c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    *canonicalized_path = escaped_canon_url_str.substr(final_parsed.path.begin,
342c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                                       final_parsed.path.len);
343c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
344c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (canonicalized_query && final_parsed.query.len > 0) {
345c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    *canonicalized_query = escaped_canon_url_str.substr(
346c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        final_parsed.query.begin, final_parsed.query.len);
347c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
348c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
349c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
350c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts) {
351c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  hosts->clear();
352c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
353c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string canon_host;
354c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  CanonicalizeUrl(url, &canon_host, NULL, NULL);
355c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
356c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const std::string host = canon_host;  // const sidesteps GCC bugs below!
357c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (host.empty())
358c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
359c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
360c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Per the Safe Browsing Protocol v2 spec, we try the host, and also up to 4
361c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // hostnames formed by starting with the last 5 components and successively
362c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // removing the leading component.  The last component isn't examined alone,
363c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // since it's the TLD or a subcomponent thereof.
364c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
365c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Note that we don't need to be clever about stopping at the "real" eTLD --
366c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // the data on the server side has been filtered to ensure it will not
367c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // blacklist a whole TLD, and it's not significantly slower on our side to
368c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // just check too much.
369c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
370c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Also note that because we have a simple blacklist, not some sort of complex
371c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // whitelist-in-blacklist or vice versa, it doesn't matter what order we check
372c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // these in.
373c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const size_t kMaxHostsToCheck = 4;
374c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  bool skipped_last_component = false;
375c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (std::string::const_reverse_iterator i(host.rbegin());
376c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       i != host.rend() && hosts->size() < kMaxHostsToCheck; ++i) {
377c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (*i == '.') {
378c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (skipped_last_component)
379c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        hosts->push_back(std::string(i.base(), host.end()));
380c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      else
381c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        skipped_last_component = true;
382c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
383c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
384c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  hosts->push_back(host);
385c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
386c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
387c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) {
388c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  paths->clear();
389c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
390c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string canon_path;
391c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string canon_query;
392c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  CanonicalizeUrl(url, NULL, &canon_path, &canon_query);
393c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
394c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const std::string path = canon_path;   // const sidesteps GCC bugs below!
395c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const std::string query = canon_query;
396c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (path.empty())
397c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
398c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
399c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Per the Safe Browsing Protocol v2 spec, we try the exact path with/without
400c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // the query parameters, and also up to 4 paths formed by starting at the root
401c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // and adding more path components.
402c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
403c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // As with the hosts above, it doesn't matter what order we check these in.
404c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const size_t kMaxPathsToCheck = 4;
405c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (std::string::const_iterator i(path.begin());
406c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       i != path.end() && paths->size() < kMaxPathsToCheck; ++i) {
407c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (*i == '/')
408c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      paths->push_back(std::string(path.begin(), i + 1));
409c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
410c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
411c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!paths->empty() && paths->back() != path)
412c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    paths->push_back(path);
413c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
414c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!query.empty())
415c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    paths->push_back(path + "?" + query);
416c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
417c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
418dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenint GetHashIndex(const SBFullHash& hash,
419dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                 const std::vector<SBFullHashResult>& full_hashes) {
420dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (size_t i = 0; i < full_hashes.size(); ++i) {
421dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (hash == full_hashes[i].hash)
422dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      return static_cast<int>(i);
423dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
424dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return -1;
425dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
426dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
427dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenint GetUrlHashIndex(const GURL& url,
428dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                    const std::vector<SBFullHashResult>& full_hashes) {
429c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (full_hashes.empty())
430c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return -1;
431c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
432c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::vector<std::string> hosts, paths;
433c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  GenerateHostsToCheck(url, &hosts);
434c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  GeneratePathsToCheck(url, &paths);
435c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
436c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t h = 0; h < hosts.size(); ++h) {
437c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    for (size_t p = 0; p < paths.size(); ++p) {
438c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      SBFullHash key;
439ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      crypto::SHA256HashString(hosts[h] + paths[p],
440ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                               key.full_hash,
441ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                               sizeof(SBFullHash));
442dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      int index = GetHashIndex(key, full_hashes);
443dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      if (index != -1) return index;
444c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
445c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
446c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
447c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return -1;
448c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
449c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
450c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool IsPhishingList(const std::string& list_name) {
451ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return list_name.compare(kPhishingList) == 0;
452c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
453c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
454c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool IsMalwareList(const std::string& list_name) {
455ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return list_name.compare(kMalwareList) == 0;
456c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
457c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
45821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool IsBadbinurlList(const std::string& list_name) {
459ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return list_name.compare(kBinUrlList) == 0;
460dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
461dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
462dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool IsBadbinhashList(const std::string& list_name) {
463ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return list_name.compare(kBinHashList) == 0;
46421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
46521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
466c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic void DecodeWebSafe(std::string* decoded) {
467c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(decoded);
468c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (std::string::iterator i(decoded->begin()); i != decoded->end(); ++i) {
469c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (*i == '_')
470c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      *i = '/';
471c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    else if (*i == '-')
472c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      *i = '+';
473c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
474c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
475c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
476c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool VerifyMAC(const std::string& key, const std::string& mac,
477c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch               const char* data, int data_length) {
478c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string key_copy = key;
479c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DecodeWebSafe(&key_copy);
480c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string decoded_key;
481c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  base::Base64Decode(key_copy, &decoded_key);
482c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
483c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string mac_copy = mac;
484c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DecodeWebSafe(&mac_copy);
485c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string decoded_mac;
486c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  base::Base64Decode(mac_copy, &decoded_mac);
487c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
488ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  crypto::HMAC hmac(crypto::HMAC::SHA1);
489c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!hmac.Init(decoded_key))
490c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
491c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const std::string data_str(data, data_length);
492c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  unsigned char digest[kSafeBrowsingMacDigestSize];
493c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!hmac.Sign(data_str, digest, kSafeBrowsingMacDigestSize))
494c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
495c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
496c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return !memcmp(digest, decoded_mac.data(), kSafeBrowsingMacDigestSize);
497c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
498c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
499c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochGURL GeneratePhishingReportUrl(const std::string& report_page,
500c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                               const std::string& url_to_report) {
501c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  icu::Locale locale = icu::Locale::getDefault();
502c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const char* lang = locale.getLanguage();
503c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!lang)
504c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    lang = "en";  // fallback
505c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const std::string continue_esc =
506c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      EscapeQueryParamValue(StringPrintf(kContinueUrlFormat, lang), true);
507c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const std::string current_esc = EscapeQueryParamValue(url_to_report, true);
508c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
509c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#if defined(OS_WIN)
510c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  BrowserDistribution* dist = BrowserDistribution::GetDistribution();
511c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string client_name(dist->GetSafeBrowsingName());
512c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#else
513c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::string client_name("googlechrome");
514c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif
515c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
516c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  GURL report_url(report_page +
517c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      StringPrintf(kReportParams, client_name.c_str(), continue_esc.c_str(),
518c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                   current_esc.c_str()));
519c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return google_util::AppendGoogleLocaleParam(report_url);
520c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
521c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
522dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out) {
523ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  DCHECK_EQ(static_cast<size_t>(crypto::SHA256_LENGTH), hash_in.size());
524ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  memcpy(hash_out->full_hash, hash_in.data(), crypto::SHA256_LENGTH);
525dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
526dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
527ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::string SBFullHashToString(const SBFullHash& hash) {
528ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  DCHECK_EQ(static_cast<size_t>(crypto::SHA256_LENGTH), sizeof(hash.full_hash));
529ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return std::string(hash.full_hash, sizeof(hash.full_hash));
530ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
531c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace safe_browsing_util
532