1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/safe_browsing/safe_browsing_util.h" 6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/base64.h" 8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h" 9ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "crypto/hmac.h" 10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "crypto/sha2.h" 113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "chrome/browser/google/google_util.h" 12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/gurl.h" 13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/url_util.h" 14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "net/base/escape.h" 15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "unicode/locid.h" 16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#if defined(OS_WIN) 18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/installer/util/browser_distribution.h" 19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif 20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const int kSafeBrowsingMacDigestSize = 20; 22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 23c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Continue to this URL after submitting the phishing report form. 24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// TODO(paulg): Change to a Chrome specific URL. 25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const char kContinueUrlFormat[] = 26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "http://www.google.com/tools/firefox/toolbar/FT2/intl/%s/submit_success.html"; 27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const char kReportParams[] = "?tpl=%s&continue=%s&url=%s"; 29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 30731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// SBChunk --------------------------------------------------------------------- 31731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 32731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunk::SBChunk() 33731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick : chunk_number(0), 34731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick list_id(0), 35731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick is_add(false) { 36731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick} 37731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 38731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunk::~SBChunk() {} 39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// SBChunkList ----------------------------------------------------------------- 41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 42731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunkList::SBChunkList() {} 43731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 44731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunkList::~SBChunkList() { 45731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick clear(); 46731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick} 47731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBChunkList::clear() { 49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (std::vector<SBChunk>::iterator citer = chunks_.begin(); 50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch citer != chunks_.end(); ++citer) { 51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (std::deque<SBChunkHost>::iterator hiter = citer->hosts.begin(); 52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hiter != citer->hosts.end(); ++hiter) { 53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (hiter->entry) { 54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hiter->entry->Destroy(); 55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hiter->entry = NULL; 56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch chunks_.clear(); 60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 62731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// SBListChunkRanges ----------------------------------------------------------- 63731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 64731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBListChunkRanges::SBListChunkRanges(const std::string& n) : name(n) {} 65731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 66731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// SBChunkDelete --------------------------------------------------------------- 67731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 68731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunkDelete::SBChunkDelete() : is_sub_del(false) {} 69731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 70731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickSBChunkDelete::~SBChunkDelete() {} 71731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// SBEntry --------------------------------------------------------------------- 73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static 75c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochSBEntry* SBEntry::Create(Type type, int prefix_count) { 76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int size = Size(type, prefix_count); 77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch SBEntry *rv = static_cast<SBEntry*>(malloc(size)); 78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch memset(rv, 0, size); 79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch rv->set_type(type); 80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch rv->set_prefix_count(prefix_count); 81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return rv; 82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBEntry::Destroy() { 85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch free(this); 86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static 89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint SBEntry::PrefixSize(Type type) { 90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch switch (type) { 91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case ADD_PREFIX: 92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return sizeof(SBPrefix); 93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case ADD_FULL_HASH: 94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return sizeof(SBFullHash); 95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case SUB_PREFIX: 96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return sizeof(SBSubPrefix); 97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case SUB_FULL_HASH: 98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return sizeof(SBSubFullHash); 99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch default: 100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NOTREACHED(); 101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return 0; 102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint SBEntry::Size() const { 106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return Size(type(), prefix_count()); 107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static 110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint SBEntry::Size(Type type, int prefix_count) { 111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return sizeof(Data) + prefix_count * PrefixSize(type); 112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint SBEntry::ChunkIdAtPrefix(int index) const { 115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (type() == SUB_PREFIX) 116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return sub_prefixes_[index].add_chunk; 117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return (type() == SUB_FULL_HASH) ? 118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sub_full_hashes_[index].add_chunk : chunk_id(); 119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBEntry::SetChunkIdAtPrefix(int index, int chunk_id) { 122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(IsSub()); 123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (type() == SUB_PREFIX) 125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sub_prefixes_[index].add_chunk = chunk_id; 126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch else 127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sub_full_hashes_[index].add_chunk = chunk_id; 128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst SBPrefix& SBEntry::PrefixAt(int index) const { 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(IsPrefix()); 132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return IsAdd() ? add_prefixes_[index] : sub_prefixes_[index].prefix; 134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst SBFullHash& SBEntry::FullHashAt(int index) const { 137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(!IsPrefix()); 138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return IsAdd() ? add_full_hashes_[index] : sub_full_hashes_[index].prefix; 140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBEntry::SetPrefixAt(int index, const SBPrefix& prefix) { 143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(IsPrefix()); 144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (IsAdd()) 146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch add_prefixes_[index] = prefix; 147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch else 148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sub_prefixes_[index].prefix = prefix; 149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid SBEntry::SetFullHashAt(int index, const SBFullHash& full_hash) { 152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(!IsPrefix()); 153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (IsAdd()) 155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch add_full_hashes_[index] = full_hash; 156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch else 157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sub_full_hashes_[index].prefix = full_hash; 158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Utility functions ----------------------------------------------------------- 162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace safe_browsing_util { 164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 165dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Listnames that browser can process. 166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst char kMalwareList[] = "goog-malware-shavar"; 167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst char kPhishingList[] = "goog-phish-shavar"; 16821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenconst char kBinUrlList[] = "goog-badbinurl-shavar"; 169ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenconst char kBinHashList[] = "goog-badbin-digestvar"; 170ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenconst char kCsdWhiteList[] = "goog-csdwhite-sha256"; 171dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint GetListId(const std::string& name) { 17321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen int id; 17421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (name == safe_browsing_util::kMalwareList) { 17521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen id = MALWARE; 17621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } else if (name == safe_browsing_util::kPhishingList) { 17721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen id = PHISH; 17821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } else if (name == safe_browsing_util::kBinUrlList) { 17921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen id = BINURL; 180ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } else if (name == safe_browsing_util::kBinHashList) { 18121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen id = BINHASH; 182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } else if (name == safe_browsing_util::kCsdWhiteList) { 183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen id = CSDWHITELIST; 18421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } else { 18521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen id = INVALID; 18621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 18721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return id; 188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 19021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool GetListName(int list_id, std::string* list) { 19121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen switch (list_id) { 19221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen case MALWARE: 19321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen *list = safe_browsing_util::kMalwareList; 19421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break; 19521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen case PHISH: 19621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen *list = safe_browsing_util::kPhishingList; 19721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break; 19821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen case BINURL: 19921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen *list = safe_browsing_util::kBinUrlList; 20021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break; 20121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen case BINHASH: 20221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen *list = safe_browsing_util::kBinHashList; 20321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break; 204ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen case CSDWHITELIST: 205ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen *list = safe_browsing_util::kCsdWhiteList; 206ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen break; 20721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen default: 20821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return false; 20921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 21021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return true; 211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::string Unescape(const std::string& url) { 214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string unescaped_str(url); 215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string old_unescaped_str; 216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const int kMaxLoopIterations = 1024; 217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int loop_var = 0; 218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch do { 219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch old_unescaped_str = unescaped_str; 220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unescaped_str = UnescapeURLComponent(old_unescaped_str, 221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch UnescapeRule::CONTROL_CHARS | UnescapeRule::SPACES | 222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch UnescapeRule::URL_SPECIAL_CHARS); 223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } while (unescaped_str != old_unescaped_str && ++loop_var <= 224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch kMaxLoopIterations); 225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return unescaped_str; 227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::string Escape(const std::string& url) { 230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string escaped_str; 231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const char* kHexString = "0123456789ABCDEF"; 232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < url.length(); i++) { 233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unsigned char c = static_cast<unsigned char>(url[i]); 234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (c <= ' ' || c > '~' || c == '#' || c == '%') { 235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escaped_str.push_back('%'); 236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escaped_str.push_back(kHexString[c >> 4]); 237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escaped_str.push_back(kHexString[c & 0xf]); 238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escaped_str.push_back(c); 240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return escaped_str; 244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::string RemoveConsecutiveChars(const std::string& str, const char c) { 247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string output(str); 248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string string_to_find; 249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string::size_type loc = 0; 250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string_to_find.append(2, c); 251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while ((loc = output.find(string_to_find, loc)) != std::string::npos) { 252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch output.erase(loc, 1); 253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return output; 256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Canonicalizes url as per Google Safe Browsing Specification. 259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// See section 6.1 in 260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid CanonicalizeUrl(const GURL& url, 262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string* canonicalized_hostname, 263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string* canonicalized_path, 264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string* canonicalized_query) { 265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(url.is_valid()); 266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We only canonicalize "normal" URLs. 268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!url.IsStandard()) 269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Following canonicalization steps are excluded since url parsing takes care 272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // of those :- 273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 1. Remove any tab (0x09), CR (0x0d), and LF (0x0a) chars from url. 274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // (Exclude escaped version of these chars). 275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 2. Normalize hostname to 4 dot-seperated decimal values. 276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 3. Lowercase hostname. 277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 4. Resolve path sequences "/../" and "/./". 278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // That leaves us with the following :- 280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 1. Remove fragment in URL. 281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GURL url_without_fragment; 282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GURL::Replacements f_replacements; 283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch f_replacements.ClearRef(); 284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch f_replacements.ClearUsername(); 285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch f_replacements.ClearPassword(); 286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_without_fragment = url.ReplaceComponents(f_replacements); 287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 2. Do URL unescaping until no more hex encoded characters exist. 289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string url_unescaped_str(Unescape(url_without_fragment.spec())); 290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::Parsed parsed; 291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::ParseStandardURL(url_unescaped_str.data(), 292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_unescaped_str.length(), &parsed); 293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 3. In hostname, remove all leading and trailing dots. 295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string host = (parsed.host.len > 0) ? url_unescaped_str.substr( 296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch parsed.host.begin, parsed.host.len) : ""; 297c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const char kCharsToTrim[] = "."; 298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string host_without_end_dots; 299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch TrimString(host, kCharsToTrim, &host_without_end_dots); 300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 301c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 4. In hostname, replace consecutive dots with a single dot. 302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string host_without_consecutive_dots(RemoveConsecutiveChars( 303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch host_without_end_dots, '.')); 304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 5. In path, replace runs of consecutive slashes with a single slash. 306c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string path = (parsed.path.len > 0) ? url_unescaped_str.substr( 307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch parsed.path.begin, parsed.path.len): ""; 308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string path_without_consecutive_slash(RemoveConsecutiveChars( 309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch path, '/')); 310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::Replacements<char> hp_replacements; 312c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hp_replacements.SetHost(host_without_consecutive_dots.data(), 313c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::Component(0, host_without_consecutive_dots.length())); 314c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hp_replacements.SetPath(path_without_consecutive_slash.data(), 315c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::Component(0, path_without_consecutive_slash.length())); 316c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 317c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string url_unescaped_with_can_hostpath; 318c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::StdStringCanonOutput output(&url_unescaped_with_can_hostpath); 319c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::Parsed temp_parsed; 320c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_util::ReplaceComponents(url_unescaped_str.data(), 321c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_unescaped_str.length(), parsed, 322c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hp_replacements, NULL, &output, &temp_parsed); 323c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch output.Complete(); 324c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 325c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 6. Step needed to revert escaping done in url_util::ReplaceComponents. 326c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_unescaped_with_can_hostpath = Unescape(url_unescaped_with_can_hostpath); 327c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 328c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 7. After performing all above steps, percent-escape all chars in url which 329c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // are <= ASCII 32, >= 127, #, %. Escapes must be uppercase hex characters. 330c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string escaped_canon_url_str(Escape(url_unescaped_with_can_hostpath)); 331c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::Parsed final_parsed; 332c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::ParseStandardURL(escaped_canon_url_str.data(), 333c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escaped_canon_url_str.length(), &final_parsed); 334c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 335c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (canonicalized_hostname && final_parsed.host.len > 0) { 336c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *canonicalized_hostname = 337c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escaped_canon_url_str.substr(final_parsed.host.begin, 338c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch final_parsed.host.len); 339c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 340c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (canonicalized_path && final_parsed.path.len > 0) { 341c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *canonicalized_path = escaped_canon_url_str.substr(final_parsed.path.begin, 342c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch final_parsed.path.len); 343c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 344c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (canonicalized_query && final_parsed.query.len > 0) { 345c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *canonicalized_query = escaped_canon_url_str.substr( 346c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch final_parsed.query.begin, final_parsed.query.len); 347c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 348c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 349c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 350c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts) { 351c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hosts->clear(); 352c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 353c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string canon_host; 354c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch CanonicalizeUrl(url, &canon_host, NULL, NULL); 355c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 356c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string host = canon_host; // const sidesteps GCC bugs below! 357c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (host.empty()) 358c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 359c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 360c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Per the Safe Browsing Protocol v2 spec, we try the host, and also up to 4 361c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // hostnames formed by starting with the last 5 components and successively 362c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // removing the leading component. The last component isn't examined alone, 363c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // since it's the TLD or a subcomponent thereof. 364c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 365c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Note that we don't need to be clever about stopping at the "real" eTLD -- 366c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the data on the server side has been filtered to ensure it will not 367c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // blacklist a whole TLD, and it's not significantly slower on our side to 368c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // just check too much. 369c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 370c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Also note that because we have a simple blacklist, not some sort of complex 371c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // whitelist-in-blacklist or vice versa, it doesn't matter what order we check 372c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // these in. 373c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const size_t kMaxHostsToCheck = 4; 374c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool skipped_last_component = false; 375c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (std::string::const_reverse_iterator i(host.rbegin()); 376c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i != host.rend() && hosts->size() < kMaxHostsToCheck; ++i) { 377c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (*i == '.') { 378c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (skipped_last_component) 379c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hosts->push_back(std::string(i.base(), host.end())); 380c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch else 381c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch skipped_last_component = true; 382c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 383c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 384c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hosts->push_back(host); 385c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 386c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 387c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) { 388c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch paths->clear(); 389c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 390c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string canon_path; 391c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string canon_query; 392c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch CanonicalizeUrl(url, NULL, &canon_path, &canon_query); 393c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 394c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string path = canon_path; // const sidesteps GCC bugs below! 395c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string query = canon_query; 396c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (path.empty()) 397c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 398c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 399c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Per the Safe Browsing Protocol v2 spec, we try the exact path with/without 400c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the query parameters, and also up to 4 paths formed by starting at the root 401c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // and adding more path components. 402c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 403c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // As with the hosts above, it doesn't matter what order we check these in. 404c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const size_t kMaxPathsToCheck = 4; 405c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (std::string::const_iterator i(path.begin()); 406c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i != path.end() && paths->size() < kMaxPathsToCheck; ++i) { 407c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (*i == '/') 408c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch paths->push_back(std::string(path.begin(), i + 1)); 409c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 410c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 411c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!paths->empty() && paths->back() != path) 412c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch paths->push_back(path); 413c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 414c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!query.empty()) 415c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch paths->push_back(path + "?" + query); 416c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 417c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 418dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenint GetHashIndex(const SBFullHash& hash, 419dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const std::vector<SBFullHashResult>& full_hashes) { 420dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (size_t i = 0; i < full_hashes.size(); ++i) { 421dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (hash == full_hashes[i].hash) 422dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return static_cast<int>(i); 423dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 424dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return -1; 425dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 426dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 427dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenint GetUrlHashIndex(const GURL& url, 428dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const std::vector<SBFullHashResult>& full_hashes) { 429c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (full_hashes.empty()) 430c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return -1; 431c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 432c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::vector<std::string> hosts, paths; 433c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GenerateHostsToCheck(url, &hosts); 434c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GeneratePathsToCheck(url, &paths); 435c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 436c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t h = 0; h < hosts.size(); ++h) { 437c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t p = 0; p < paths.size(); ++p) { 438c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch SBFullHash key; 439ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen crypto::SHA256HashString(hosts[h] + paths[p], 440ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen key.full_hash, 441ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen sizeof(SBFullHash)); 442dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen int index = GetHashIndex(key, full_hashes); 443dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (index != -1) return index; 444c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 445c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 446c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 447c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return -1; 448c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 449c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 450c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool IsPhishingList(const std::string& list_name) { 451ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return list_name.compare(kPhishingList) == 0; 452c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 453c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 454c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool IsMalwareList(const std::string& list_name) { 455ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return list_name.compare(kMalwareList) == 0; 456c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 457c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 45821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool IsBadbinurlList(const std::string& list_name) { 459ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return list_name.compare(kBinUrlList) == 0; 460dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 461dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 462dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool IsBadbinhashList(const std::string& list_name) { 463ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return list_name.compare(kBinHashList) == 0; 46421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} 46521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 466c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic void DecodeWebSafe(std::string* decoded) { 467c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(decoded); 468c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (std::string::iterator i(decoded->begin()); i != decoded->end(); ++i) { 469c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (*i == '_') 470c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *i = '/'; 471c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch else if (*i == '-') 472c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *i = '+'; 473c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 474c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 475c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 476c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool VerifyMAC(const std::string& key, const std::string& mac, 477c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const char* data, int data_length) { 478c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string key_copy = key; 479c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DecodeWebSafe(&key_copy); 480c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string decoded_key; 481c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch base::Base64Decode(key_copy, &decoded_key); 482c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 483c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string mac_copy = mac; 484c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DecodeWebSafe(&mac_copy); 485c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string decoded_mac; 486c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch base::Base64Decode(mac_copy, &decoded_mac); 487c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 488ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen crypto::HMAC hmac(crypto::HMAC::SHA1); 489c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!hmac.Init(decoded_key)) 490c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 491c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string data_str(data, data_length); 492c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unsigned char digest[kSafeBrowsingMacDigestSize]; 493c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!hmac.Sign(data_str, digest, kSafeBrowsingMacDigestSize)) 494c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 495c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 496c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return !memcmp(digest, decoded_mac.data(), kSafeBrowsingMacDigestSize); 497c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 498c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 499c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochGURL GeneratePhishingReportUrl(const std::string& report_page, 500c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string& url_to_report) { 501c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch icu::Locale locale = icu::Locale::getDefault(); 502c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const char* lang = locale.getLanguage(); 503c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!lang) 504c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch lang = "en"; // fallback 505c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string continue_esc = 506c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EscapeQueryParamValue(StringPrintf(kContinueUrlFormat, lang), true); 507c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string current_esc = EscapeQueryParamValue(url_to_report, true); 508c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 509c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#if defined(OS_WIN) 510c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch BrowserDistribution* dist = BrowserDistribution::GetDistribution(); 511c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string client_name(dist->GetSafeBrowsingName()); 512c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#else 513c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string client_name("googlechrome"); 514c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif 515c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 516c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GURL report_url(report_page + 517c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch StringPrintf(kReportParams, client_name.c_str(), continue_esc.c_str(), 518c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch current_esc.c_str())); 519c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return google_util::AppendGoogleLocaleParam(report_url); 520c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 521c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 522dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out) { 523ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_EQ(static_cast<size_t>(crypto::SHA256_LENGTH), hash_in.size()); 524ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen memcpy(hash_out->full_hash, hash_in.data(), crypto::SHA256_LENGTH); 525dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 526dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 527ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::string SBFullHashToString(const SBFullHash& hash) { 528ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_EQ(static_cast<size_t>(crypto::SHA256_LENGTH), sizeof(hash.full_hash)); 529ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return std::string(hash.full_hash, sizeof(hash.full_hash)); 530ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 531c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} // namespace safe_browsing_util 532