15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/safe_browsing_util.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h"
9868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/stringprintf.h"
10f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "chrome/browser/browser_process.h"
11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "chrome/browser/safe_browsing/chunk.pb.h"
126d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)#include "components/google/core/browser/google_util.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "crypto/sha2.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/escape.h"
15eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "url/gurl.h"
16eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "url/url_util.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_WIN)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/installer/util/browser_distribution.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char kReportParams[] = "?tpl=%s&url=%s";
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)SBFullHash SBFullHashForString(const base::StringPiece& str) {
25a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  SBFullHash h;
26a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  crypto::SHA256HashString(str, &h.full_hash, sizeof(h.full_hash));
27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return h;
28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
29a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
30f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// SBChunkData -----------------------------------------------------------------
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
32f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// TODO(shess): Right now this contains a scoped_ptr<ChunkData> so that the
33f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// proto buffer isn't copied all over the place, then these are contained in a
34f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// ScopedVector for purposes of passing things around between tasks.  This seems
35f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// convoluted.  Maybe it would make sense to have an overall container class
36f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// returning references to a nested per-chunk class?
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SBChunkData::SBChunkData() {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SBChunkData::SBChunkData(safe_browsing::ChunkData* raw_data)
42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    : chunk_data_(raw_data) {
43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK(chunk_data_.get());
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SBChunkData::~SBChunkData() {
47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool SBChunkData::ParseFrom(const unsigned char* data, size_t length) {
50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  scoped_ptr<safe_browsing::ChunkData> chunk(new safe_browsing::ChunkData());
51f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (!chunk->ParseFromArray(data, length))
52f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return false;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
54f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (chunk->chunk_type() != safe_browsing::ChunkData::ADD &&
55f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      chunk->chunk_type() != safe_browsing::ChunkData::SUB) {
56f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return false;
57f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  size_t hash_size = 0;
60f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (chunk->prefix_type() == safe_browsing::ChunkData::PREFIX_4B) {
61f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    hash_size = sizeof(SBPrefix);
62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  } else if (chunk->prefix_type() == safe_browsing::ChunkData::FULL_32B) {
63f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    hash_size = sizeof(SBFullHash);
64f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  } else {
65f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return false;
66f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const size_t hash_count = chunk->hashes().size() / hash_size;
69f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (hash_count * hash_size != chunk->hashes().size())
70f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return false;
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
72f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (chunk->chunk_type() == safe_browsing::ChunkData::SUB &&
73f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      static_cast<size_t>(chunk->add_numbers_size()) != hash_count) {
74f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return false;
75f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
77f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  chunk_data_.swap(chunk);
78f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return true;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
81f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)int SBChunkData::ChunkNumber() const {
82f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return chunk_data_->chunk_number();
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool SBChunkData::IsAdd() const {
86f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return chunk_data_->chunk_type() == safe_browsing::ChunkData::ADD;
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
89f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool SBChunkData::IsSub() const {
90f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return chunk_data_->chunk_type() == safe_browsing::ChunkData::SUB;
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
93f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)int SBChunkData::AddChunkNumberAt(size_t i) const {
94f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK(IsSub());
95f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK((IsPrefix() && i < PrefixCount()) ||
96f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)         (IsFullHash() && i < FullHashCount()));
97f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return chunk_data_->add_numbers(i);
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
100f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool SBChunkData::IsPrefix() const {
101f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return chunk_data_->prefix_type() == safe_browsing::ChunkData::PREFIX_4B;
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
104f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)size_t SBChunkData::PrefixCount() const {
105f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK(IsPrefix());
106f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return chunk_data_->hashes().size() / sizeof(SBPrefix);
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
109f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SBPrefix SBChunkData::PrefixAt(size_t i) const {
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(IsPrefix());
111f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK_LT(i, PrefixCount());
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
113f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  SBPrefix prefix;
114f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  memcpy(&prefix, chunk_data_->hashes().data() + i * sizeof(SBPrefix),
115f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)         sizeof(SBPrefix));
116f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return prefix;
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
119f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool SBChunkData::IsFullHash() const {
120f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return chunk_data_->prefix_type() == safe_browsing::ChunkData::FULL_32B;
121f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
123f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)size_t SBChunkData::FullHashCount() const {
124f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK(IsFullHash());
125f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return chunk_data_->hashes().size() / sizeof(SBFullHash);
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
128f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SBFullHash SBChunkData::FullHashAt(size_t i) const {
129f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK(IsFullHash());
130f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK_LT(i, FullHashCount());
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
132f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  SBFullHash full_hash;
133f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  memcpy(&full_hash, chunk_data_->hashes().data() + i * sizeof(SBFullHash),
134f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)         sizeof(SBFullHash));
135f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return full_hash;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
138f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// SBListChunkRanges -----------------------------------------------------------
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
140f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SBListChunkRanges::SBListChunkRanges(const std::string& n)
141f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    : name(n) {
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
144f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// SBChunkDelete ---------------------------------------------------------------
145f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
146f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SBChunkDelete::SBChunkDelete() : is_sub_del(false) {}
147f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
148f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)SBChunkDelete::~SBChunkDelete() {}
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Utility functions -----------------------------------------------------------
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
152f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)namespace {
153f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)bool IsKnownList(const std::string& name) {
154f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (size_t i = 0; i < arraysize(safe_browsing_util::kAllLists); ++i) {
155f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if (!strcmp(safe_browsing_util::kAllLists[i], name.c_str())) {
156f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return true;
157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  return false;
160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}  // namespace
162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace safe_browsing_util {
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Listnames that browser can process.
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kMalwareList[] = "goog-malware-shavar";
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kPhishingList[] = "goog-phish-shavar";
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kBinUrlList[] = "goog-badbinurl-shavar";
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kCsdWhiteList[] = "goog-csdwhite-sha256";
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kDownloadWhiteList[] = "goog-downloadwhite-digest256";
1712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)const char kExtensionBlacklist[] = "goog-badcrxids-digestvar";
17290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)const char kSideEffectFreeWhitelist[] = "goog-sideeffectfree-shavar";
1730f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)const char kIPBlacklist[] = "goog-badip-digest256";
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)const char* kAllLists[8] = {
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  kMalwareList,
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  kPhishingList,
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  kBinUrlList,
179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  kCsdWhiteList,
180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  kDownloadWhiteList,
181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  kExtensionBlacklist,
182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  kSideEffectFreeWhitelist,
183f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  kIPBlacklist,
184f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)};
185f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
186f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)ListType GetListId(const base::StringPiece& name) {
1872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  ListType id;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (name == safe_browsing_util::kMalwareList) {
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    id = MALWARE;
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (name == safe_browsing_util::kPhishingList) {
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    id = PHISH;
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (name == safe_browsing_util::kBinUrlList) {
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    id = BINURL;
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (name == safe_browsing_util::kCsdWhiteList) {
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    id = CSDWHITELIST;
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (name == safe_browsing_util::kDownloadWhiteList) {
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    id = DOWNLOADWHITELIST;
1982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  } else if (name == safe_browsing_util::kExtensionBlacklist) {
1992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    id = EXTENSIONBLACKLIST;
20090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  } else if (name == safe_browsing_util::kSideEffectFreeWhitelist) {
20190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    id = SIDEEFFECTFREEWHITELIST;
2020f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  } else if (name == safe_browsing_util::kIPBlacklist) {
2030f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    id = IPBLACKLIST;
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    id = INVALID;
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return id;
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool GetListName(ListType list_id, std::string* list) {
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (list_id) {
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case MALWARE:
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *list = safe_browsing_util::kMalwareList;
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case PHISH:
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *list = safe_browsing_util::kPhishingList;
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BINURL:
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *list = safe_browsing_util::kBinUrlList;
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case CSDWHITELIST:
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *list = safe_browsing_util::kCsdWhiteList;
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case DOWNLOADWHITELIST:
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *list = safe_browsing_util::kDownloadWhiteList;
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    case EXTENSIONBLACKLIST:
2282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      *list = safe_browsing_util::kExtensionBlacklist;
2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      break;
23090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    case SIDEEFFECTFREEWHITELIST:
23190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      *list = safe_browsing_util::kSideEffectFreeWhitelist;
23290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      break;
2330f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    case IPBLACKLIST:
2340f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      *list = safe_browsing_util::kIPBlacklist;
2350f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      break;
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
239f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  DCHECK(IsKnownList(*list));
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string Unescape(const std::string& url) {
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string unescaped_str(url);
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string old_unescaped_str;
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int kMaxLoopIterations = 1024;
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int loop_var = 0;
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  do {
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    old_unescaped_str = unescaped_str;
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unescaped_str = net::UnescapeURLComponent(old_unescaped_str,
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        net::UnescapeRule::CONTROL_CHARS | net::UnescapeRule::SPACES |
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        net::UnescapeRule::URL_SPECIAL_CHARS);
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } while (unescaped_str != old_unescaped_str && ++loop_var <=
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           kMaxLoopIterations);
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return unescaped_str;
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string Escape(const std::string& url) {
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string escaped_str;
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* kHexString = "0123456789ABCDEF";
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < url.length(); i++) {
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned char c = static_cast<unsigned char>(url[i]);
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (c <= ' ' || c > '~' || c == '#' || c == '%') {
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      escaped_str.push_back('%');
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      escaped_str.push_back(kHexString[c >> 4]);
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      escaped_str.push_back(kHexString[c & 0xf]);
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      escaped_str.push_back(c);
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return escaped_str;
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string RemoveConsecutiveChars(const std::string& str, const char c) {
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string output(str);
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string string_to_find;
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string::size_type loc = 0;
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string_to_find.append(2, c);
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while ((loc = output.find(string_to_find, loc)) != std::string::npos) {
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    output.erase(loc, 1);
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return output;
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Canonicalizes url as per Google Safe Browsing Specification.
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// See section 6.1 in
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CanonicalizeUrl(const GURL& url,
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     std::string* canonicalized_hostname,
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     std::string* canonicalized_path,
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     std::string* canonicalized_query) {
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(url.is_valid());
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We only canonicalize "normal" URLs.
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!url.IsStandard())
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Following canonicalization steps are excluded since url parsing takes care
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // of those :-
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 1. Remove any tab (0x09), CR (0x0d), and LF (0x0a) chars from url.
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //    (Exclude escaped version of these chars).
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 2. Normalize hostname to 4 dot-seperated decimal values.
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 3. Lowercase hostname.
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 4. Resolve path sequences "/../" and "/./".
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // That leaves us with the following :-
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 1. Remove fragment in URL.
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL url_without_fragment;
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL::Replacements f_replacements;
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  f_replacements.ClearRef();
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  f_replacements.ClearUsername();
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  f_replacements.ClearPassword();
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  url_without_fragment = url.ReplaceComponents(f_replacements);
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 2. Do URL unescaping until no more hex encoded characters exist.
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string url_unescaped_str(Unescape(url_without_fragment.spec()));
3205c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  url::Parsed parsed;
3215c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  url::ParseStandardURL(url_unescaped_str.data(), url_unescaped_str.length(),
3225c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                        &parsed);
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 3. In hostname, remove all leading and trailing dots.
325c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  const std::string host =
326c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      (parsed.host.len > 0)
327c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          ? url_unescaped_str.substr(parsed.host.begin, parsed.host.len)
328c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          : std::string();
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string host_without_end_dots;
330cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  base::TrimString(host, ".", &host_without_end_dots);
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 4. In hostname, replace consecutive dots with a single dot.
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string host_without_consecutive_dots(RemoveConsecutiveChars(
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      host_without_end_dots, '.'));
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 5. In path, replace runs of consecutive slashes with a single slash.
337c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  std::string path =
338c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      (parsed.path.len > 0)
339c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          ? url_unescaped_str.substr(parsed.path.begin, parsed.path.len)
340c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          : std::string();
341c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  std::string path_without_consecutive_slash(RemoveConsecutiveChars(path, '/'));
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3435c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  url::Replacements<char> hp_replacements;
3445c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  hp_replacements.SetHost(
3455c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu      host_without_consecutive_dots.data(),
3465c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu      url::Component(0, host_without_consecutive_dots.length()));
3475c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  hp_replacements.SetPath(
3485c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu      path_without_consecutive_slash.data(),
3495c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu      url::Component(0, path_without_consecutive_slash.length()));
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string url_unescaped_with_can_hostpath;
3525c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  url::StdStringCanonOutput output(&url_unescaped_with_can_hostpath);
3535c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  url::Parsed temp_parsed;
3545c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  url::ReplaceComponents(url_unescaped_str.data(),
3555c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                         url_unescaped_str.length(),
3565c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                         parsed,
3575c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                         hp_replacements,
3585c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                         NULL,
3595c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                         &output,
3605c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                         &temp_parsed);
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  output.Complete();
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3635c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  // 6. Step needed to revert escaping done in url::ReplaceComponents.
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  url_unescaped_with_can_hostpath = Unescape(url_unescaped_with_can_hostpath);
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // 7. After performing all above steps, percent-escape all chars in url which
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // are <= ASCII 32, >= 127, #, %. Escapes must be uppercase hex characters.
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string escaped_canon_url_str(Escape(url_unescaped_with_can_hostpath));
3695c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  url::Parsed final_parsed;
3705c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  url::ParseStandardURL(escaped_canon_url_str.data(),
3715c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                        escaped_canon_url_str.length(),
3725c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                        &final_parsed);
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (canonicalized_hostname && final_parsed.host.len > 0) {
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *canonicalized_hostname =
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        escaped_canon_url_str.substr(final_parsed.host.begin,
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                     final_parsed.host.len);
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (canonicalized_path && final_parsed.path.len > 0) {
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *canonicalized_path = escaped_canon_url_str.substr(final_parsed.path.begin,
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                       final_parsed.path.len);
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (canonicalized_query && final_parsed.query.len > 0) {
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *canonicalized_query = escaped_canon_url_str.substr(
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        final_parsed.query.begin, final_parsed.query.len);
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts) {
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hosts->clear();
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string canon_host;
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CanonicalizeUrl(url, &canon_host, NULL, NULL);
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string host = canon_host;  // const sidesteps GCC bugs below!
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (host.empty())
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Per the Safe Browsing Protocol v2 spec, we try the host, and also up to 4
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // hostnames formed by starting with the last 5 components and successively
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // removing the leading component.  The last component isn't examined alone,
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // since it's the TLD or a subcomponent thereof.
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note that we don't need to be clever about stopping at the "real" eTLD --
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the data on the server side has been filtered to ensure it will not
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // blacklist a whole TLD, and it's not significantly slower on our side to
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // just check too much.
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Also note that because we have a simple blacklist, not some sort of complex
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // whitelist-in-blacklist or vice versa, it doesn't matter what order we check
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // these in.
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t kMaxHostsToCheck = 4;
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool skipped_last_component = false;
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (std::string::const_reverse_iterator i(host.rbegin());
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       i != host.rend() && hosts->size() < kMaxHostsToCheck; ++i) {
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*i == '.') {
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (skipped_last_component)
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        hosts->push_back(std::string(i.base(), host.end()));
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      else
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        skipped_last_component = true;
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hosts->push_back(host);
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths) {
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  paths->clear();
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string canon_path;
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string canon_query;
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CanonicalizeUrl(url, NULL, &canon_path, &canon_query);
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string path = canon_path;   // const sidesteps GCC bugs below!
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string query = canon_query;
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (path.empty())
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Per the Safe Browsing Protocol v2 spec, we try the exact path with/without
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the query parameters, and also up to 4 paths formed by starting at the root
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // and adding more path components.
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // As with the hosts above, it doesn't matter what order we check these in.
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t kMaxPathsToCheck = 4;
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (std::string::const_iterator i(path.begin());
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       i != path.end() && paths->size() < kMaxPathsToCheck; ++i) {
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*i == '/')
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      paths->push_back(std::string(path.begin(), i + 1));
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!paths->empty() && paths->back() != path)
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    paths->push_back(path);
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!query.empty())
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    paths->push_back(path + "?" + query);
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls) {
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<std::string> hosts, paths;
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GenerateHostsToCheck(url, &hosts);
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GeneratePathsToCheck(url, &paths);
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t h = 0; h < hosts.size(); ++h) {
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t p = 0; p < paths.size(); ++p) {
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      urls->push_back(hosts[h] + paths[p]);
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)GURL GeneratePhishingReportUrl(const std::string& report_page,
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               const std::string& url_to_report,
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               bool is_client_side_detection) {
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string current_esc = net::EscapeQueryParamValue(url_to_report,
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                             true);
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_WIN)
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BrowserDistribution* dist = BrowserDistribution::GetDistribution();
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string client_name(dist->GetSafeBrowsingName());
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string client_name("googlechrome");
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (is_client_side_detection)
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    client_name.append("_csd");
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL report_url(report_page + base::StringPrintf(kReportParams,
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                   client_name.c_str(),
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                   current_esc.c_str()));
486f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return google_util::AppendGoogleLocaleParam(
487f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      report_url, g_browser_process->GetApplicationLocale());
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)SBFullHash StringToSBFullHash(const std::string& hash_in) {
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(crypto::kSHA256Length, hash_in.size());
4922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  SBFullHash hash_out;
4932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  memcpy(hash_out.full_hash, hash_in.data(), crypto::kSHA256Length);
4942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return hash_out;
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string SBFullHashToString(const SBFullHash& hash) {
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_EQ(crypto::kSHA256Length, sizeof(hash.full_hash));
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return std::string(hash.full_hash, sizeof(hash.full_hash));
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace safe_browsing_util
503