1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/base/sdch_manager.h"
6
7#include "base/base64.h"
8#include "base/logging.h"
9#include "base/metrics/histogram.h"
10#include "base/string_number_conversions.h"
11#include "base/string_util.h"
12#include "crypto/sha2.h"
13#include "net/base/registry_controlled_domain.h"
14#include "net/url_request/url_request_http_job.h"
15
16namespace net {
17
18//------------------------------------------------------------------------------
19// static
20const size_t SdchManager::kMaxDictionarySize = 1000000;
21
22// static
23const size_t SdchManager::kMaxDictionaryCount = 20;
24
25// static
26SdchManager* SdchManager::global_;
27
28//------------------------------------------------------------------------------
29SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
30                                    size_t offset,
31                                    const std::string& client_hash,
32                                    const GURL& gurl,
33                                    const std::string& domain,
34                                    const std::string& path,
35                                    const base::Time& expiration,
36                                    const std::set<int>& ports)
37    : text_(dictionary_text, offset),
38      client_hash_(client_hash),
39      url_(gurl),
40      domain_(domain),
41      path_(path),
42      expiration_(expiration),
43      ports_(ports) {
44}
45
46SdchManager::Dictionary::~Dictionary() {
47}
48
49bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
50  if (!SdchManager::Global()->IsInSupportedDomain(target_url))
51    return false;
52  /* The specific rules of when a dictionary should be advertised in an
53     Avail-Dictionary header are modeled after the rules for cookie scoping. The
54     terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
55     dictionary may be advertised in the Avail-Dictionaries header exactly when
56     all of the following are true:
57      1. The server's effective host name domain-matches the Domain attribute of
58         the dictionary.
59      2. If the dictionary has a Port attribute, the request port is one of the
60         ports listed in the Port attribute.
61      3. The request URI path-matches the path header of the dictionary.
62      4. The request is not an HTTPS request.
63    */
64  if (!DomainMatch(target_url, domain_))
65    return false;
66  if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
67    return false;
68  if (path_.size() && !PathMatch(target_url.path(), path_))
69    return false;
70  if (target_url.SchemeIsSecure())
71    return false;
72  if (base::Time::Now() > expiration_)
73    return false;
74  return true;
75}
76
77//------------------------------------------------------------------------------
78// Security functions restricting loads and use of dictionaries.
79
80// static
81bool SdchManager::Dictionary::CanSet(const std::string& domain,
82                                     const std::string& path,
83                                     const std::set<int>& ports,
84                                     const GURL& dictionary_url) {
85  if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
86    return false;
87  /*
88  A dictionary is invalid and must not be stored if any of the following are
89  true:
90    1. The dictionary has no Domain attribute.
91    2. The effective host name that derives from the referer URL host name does
92      not domain-match the Domain attribute.
93    3. The Domain attribute is a top level domain.
94    4. The referer URL host is a host domain name (not IP address) and has the
95      form HD, where D is the value of the Domain attribute, and H is a string
96      that contains one or more dots.
97    5. If the dictionary has a Port attribute and the referer URL's port was not
98      in the list.
99  */
100
101  // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
102  // and hence the conservative approach is to not allow any redirects (if there
103  // were any... then don't allow the dictionary to be set).
104
105  if (domain.empty()) {
106    SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
107    return false;  // Domain is required.
108  }
109  if (RegistryControlledDomainService::GetDomainAndRegistry(domain).size()
110      == 0) {
111    SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
112    return false;  // domain was a TLD.
113  }
114  if (!Dictionary::DomainMatch(dictionary_url, domain)) {
115    SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
116    return false;
117  }
118
119  std::string referrer_url_host = dictionary_url.host();
120  size_t postfix_domain_index = referrer_url_host.rfind(domain);
121  // See if it is indeed a postfix, or just an internal string.
122  if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
123    // It is a postfix... so check to see if there's a dot in the prefix.
124    size_t end_of_host_index = referrer_url_host.find_first_of('.');
125    if (referrer_url_host.npos != end_of_host_index  &&
126        end_of_host_index < postfix_domain_index) {
127      SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
128      return false;
129    }
130  }
131
132  if (!ports.empty()
133      && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
134    SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
135    return false;
136  }
137  return true;
138}
139
140// static
141bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
142  if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
143    return false;
144  /*
145    1. The request URL's host name domain-matches the Domain attribute of the
146      dictionary.
147    2. If the dictionary has a Port attribute, the request port is one of the
148      ports listed in the Port attribute.
149    3. The request URL path-matches the path attribute of the dictionary.
150    4. The request is not an HTTPS request.
151*/
152  if (!DomainMatch(referring_url, domain_)) {
153    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
154    return false;
155  }
156  if (!ports_.empty()
157      && 0 == ports_.count(referring_url.EffectiveIntPort())) {
158    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
159    return false;
160  }
161  if (path_.size() && !PathMatch(referring_url.path(), path_)) {
162    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
163    return false;
164  }
165  if (referring_url.SchemeIsSecure()) {
166    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
167    return false;
168  }
169
170  // TODO(jar): Remove overly restrictive failsafe test (added per security
171  // review) when we have a need to be more general.
172  if (!referring_url.SchemeIs("http")) {
173    SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
174    return false;
175  }
176
177  return true;
178}
179
180bool SdchManager::Dictionary::PathMatch(const std::string& path,
181                                        const std::string& restriction) {
182  /*  Must be either:
183  1. P2 is equal to P1
184  2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
185      character following P2 in P1 is "/".
186      */
187  if (path == restriction)
188    return true;
189  size_t prefix_length = restriction.size();
190  if (prefix_length > path.size())
191    return false;  // Can't be a prefix.
192  if (0 != path.compare(0, prefix_length, restriction))
193    return false;
194  return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
195}
196
197// static
198bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
199                                          const std::string& restriction) {
200  // TODO(jar): This is not precisely a domain match definition.
201  return gurl.DomainIs(restriction.data(), restriction.size());
202}
203
204//------------------------------------------------------------------------------
205SdchManager::SdchManager() : sdch_enabled_(false) {
206  DCHECK(!global_);
207  global_ = this;
208}
209
210SdchManager::~SdchManager() {
211  DCHECK(global_ == this);
212  while (!dictionaries_.empty()) {
213    DictionaryMap::iterator it = dictionaries_.begin();
214    it->second->Release();
215    dictionaries_.erase(it->first);
216  }
217  global_ = NULL;
218}
219
220// static
221void SdchManager::Shutdown() {
222  if (!global_ )
223    return;
224  global_->fetcher_.reset(NULL);
225}
226
227// static
228SdchManager* SdchManager::Global() {
229  return global_;
230}
231
232// static
233void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
234  UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
235}
236
237void SdchManager::EnableSdchSupport(const std::string& domain) {
238  // We presume that there is a SDCH manager instance.
239  global_->supported_domain_ = domain;
240  global_->sdch_enabled_ = true;
241}
242
243// static
244void SdchManager::BlacklistDomain(const GURL& url) {
245  if (!global_ )
246    return;
247  global_->SetAllowLatencyExperiment(url, false);
248
249  std::string domain(StringToLowerASCII(url.host()));
250  int count = global_->blacklisted_domains_[domain];
251  if (count > 0)
252    return;  // Domain is already blacklisted.
253
254  count = 1 + 2 * global_->exponential_blacklist_count[domain];
255  if (count > 0)
256    global_->exponential_blacklist_count[domain] = count;
257  else
258    count = INT_MAX;
259
260  global_->blacklisted_domains_[domain] = count;
261}
262
263// static
264void SdchManager::BlacklistDomainForever(const GURL& url) {
265  if (!global_ )
266    return;
267  global_->SetAllowLatencyExperiment(url, false);
268
269  std::string domain(StringToLowerASCII(url.host()));
270  global_->exponential_blacklist_count[domain] = INT_MAX;
271  global_->blacklisted_domains_[domain] = INT_MAX;
272}
273
274// static
275void SdchManager::ClearBlacklistings() {
276  Global()->blacklisted_domains_.clear();
277  Global()->exponential_blacklist_count.clear();
278}
279
280// static
281void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
282  Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
283}
284
285// static
286int SdchManager::BlackListDomainCount(const std::string& domain) {
287  if (Global()->blacklisted_domains_.end() ==
288      Global()->blacklisted_domains_.find(domain))
289    return 0;
290  return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
291}
292
293// static
294int SdchManager::BlacklistDomainExponential(const std::string& domain) {
295  if (Global()->exponential_blacklist_count.end() ==
296      Global()->exponential_blacklist_count.find(domain))
297    return 0;
298  return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
299}
300
301bool SdchManager::IsInSupportedDomain(const GURL& url) {
302  if (!sdch_enabled_ )
303    return false;
304  if (!supported_domain_.empty() &&
305      !url.DomainIs(supported_domain_.data(), supported_domain_.size()))
306     return false;  // It is not the singular supported domain.
307
308  if (blacklisted_domains_.empty())
309    return true;
310
311  std::string domain(StringToLowerASCII(url.host()));
312  DomainCounter::iterator it = blacklisted_domains_.find(domain);
313  if (blacklisted_domains_.end() == it)
314    return true;
315
316  int count = it->second - 1;
317  if (count > 0)
318    blacklisted_domains_[domain] = count;
319  else
320    blacklisted_domains_.erase(domain);
321  SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
322  return false;
323}
324
325void SdchManager::FetchDictionary(const GURL& request_url,
326                                  const GURL& dictionary_url) {
327  if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
328      fetcher_.get())
329    fetcher_->Schedule(dictionary_url);
330}
331
332bool SdchManager::CanFetchDictionary(const GURL& referring_url,
333                                     const GURL& dictionary_url) const {
334  /* The user agent may retrieve a dictionary from the dictionary URL if all of
335     the following are true:
336       1 The dictionary URL host name matches the referrer URL host name
337       2 The dictionary URL host name domain matches the parent domain of the
338           referrer URL host name
339       3 The parent domain of the referrer URL host name is not a top level
340           domain
341       4 The dictionary URL is not an HTTPS URL.
342   */
343  // Item (1) above implies item (2).  Spec should be updated.
344  // I take "host name match" to be "is identical to"
345  if (referring_url.host() != dictionary_url.host()) {
346    SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
347    return false;
348  }
349  if (referring_url.SchemeIs("https")) {
350    SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
351    return false;
352  }
353
354  // TODO(jar): Remove this failsafe conservative hack which is more restrictive
355  // than current SDCH spec when needed, and justified by security audit.
356  if (!referring_url.SchemeIs("http")) {
357    SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
358    return false;
359  }
360
361  return true;
362}
363
364bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
365    const GURL& dictionary_url) {
366  std::string client_hash;
367  std::string server_hash;
368  GenerateHash(dictionary_text, &client_hash, &server_hash);
369  if (dictionaries_.find(server_hash) != dictionaries_.end()) {
370    SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
371    return false;  // Already loaded.
372  }
373
374  std::string domain, path;
375  std::set<int> ports;
376  base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
377
378  if (dictionary_text.empty()) {
379    SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
380    return false;  // Missing header.
381  }
382
383  size_t header_end = dictionary_text.find("\n\n");
384  if (std::string::npos == header_end) {
385    SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
386    return false;  // Missing header.
387  }
388  size_t line_start = 0;  // Start of line being parsed.
389  while (1) {
390    size_t line_end = dictionary_text.find('\n', line_start);
391    DCHECK(std::string::npos != line_end);
392    DCHECK(line_end <= header_end);
393
394    size_t colon_index = dictionary_text.find(':', line_start);
395    if (std::string::npos == colon_index) {
396      SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
397      return false;  // Illegal line missing a colon.
398    }
399
400    if (colon_index > line_end)
401      break;
402
403    size_t value_start = dictionary_text.find_first_not_of(" \t",
404                                                           colon_index + 1);
405    if (std::string::npos != value_start) {
406      if (value_start >= line_end)
407        break;
408      std::string name(dictionary_text, line_start, colon_index - line_start);
409      std::string value(dictionary_text, value_start, line_end - value_start);
410      name = StringToLowerASCII(name);
411      if (name == "domain") {
412        domain = value;
413      } else if (name == "path") {
414        path = value;
415      } else if (name == "format-version") {
416        if (value != "1.0")
417          return false;
418      } else if (name == "max-age") {
419        int64 seconds;
420        base::StringToInt64(value, &seconds);
421        expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
422      } else if (name == "port") {
423        int port;
424        base::StringToInt(value, &port);
425        if (port >= 0)
426          ports.insert(port);
427      }
428    }
429
430    if (line_end >= header_end)
431      break;
432    line_start = line_end + 1;
433  }
434
435  if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
436    return false;
437
438  // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
439  // useless dictionaries.  We should probably have a cache eviction plan,
440  // instead of just blocking additions.  For now, with the spec in flux, it
441  // is probably not worth doing eviction handling.
442  if (kMaxDictionarySize < dictionary_text.size()) {
443    SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
444    return false;
445  }
446  if (kMaxDictionaryCount <= dictionaries_.size()) {
447    SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
448    return false;
449  }
450
451  UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
452  DVLOG(1) << "Loaded dictionary with client hash " << client_hash
453           << " and server hash " << server_hash;
454  Dictionary* dictionary =
455      new Dictionary(dictionary_text, header_end + 2, client_hash,
456                     dictionary_url, domain, path, expiration, ports);
457  dictionary->AddRef();
458  dictionaries_[server_hash] = dictionary;
459  return true;
460}
461
462void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
463    const GURL& referring_url, Dictionary** dictionary) {
464  *dictionary = NULL;
465  DictionaryMap::iterator it = dictionaries_.find(server_hash);
466  if (it == dictionaries_.end()) {
467    return;
468  }
469  Dictionary* matching_dictionary = it->second;
470  if (!matching_dictionary->CanUse(referring_url))
471    return;
472  *dictionary = matching_dictionary;
473}
474
475// TODO(jar): If we have evictions from the dictionaries_, then we need to
476// change this interface to return a list of reference counted Dictionary
477// instances that can be used if/when a server specifies one.
478void SdchManager::GetAvailDictionaryList(const GURL& target_url,
479                                         std::string* list) {
480  int count = 0;
481  for (DictionaryMap::iterator it = dictionaries_.begin();
482       it != dictionaries_.end(); ++it) {
483    if (!it->second->CanAdvertise(target_url))
484      continue;
485    ++count;
486    if (!list->empty())
487      list->append(",");
488    list->append(it->second->client_hash());
489  }
490  // Watch to see if we have corrupt or numerous dictionaries.
491  if (count > 0)
492    UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
493}
494
495// static
496void SdchManager::GenerateHash(const std::string& dictionary_text,
497    std::string* client_hash, std::string* server_hash) {
498  char binary_hash[32];
499  crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
500
501  std::string first_48_bits(&binary_hash[0], 6);
502  std::string second_48_bits(&binary_hash[6], 6);
503  UrlSafeBase64Encode(first_48_bits, client_hash);
504  UrlSafeBase64Encode(second_48_bits, server_hash);
505
506  DCHECK_EQ(server_hash->length(), 8u);
507  DCHECK_EQ(client_hash->length(), 8u);
508}
509
510//------------------------------------------------------------------------------
511// Methods for supporting latency experiments.
512
513bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
514  return allow_latency_experiment_.end() !=
515      allow_latency_experiment_.find(url.host());
516}
517
518void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
519  if (enable) {
520    allow_latency_experiment_.insert(url.host());
521    return;
522  }
523  ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
524  if (allow_latency_experiment_.end() == it)
525    return;  // It was already erased, or never allowed.
526  SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
527  allow_latency_experiment_.erase(it);
528}
529
530// static
531void SdchManager::UrlSafeBase64Encode(const std::string& input,
532                                      std::string* output) {
533  // Since this is only done during a dictionary load, and hashes are only 8
534  // characters, we just do the simple fixup, rather than rewriting the encoder.
535  base::Base64Encode(input, output);
536  for (size_t i = 0; i < output->size(); ++i) {
537    switch (output->data()[i]) {
538      case '+':
539        (*output)[i] = '-';
540        continue;
541      case '/':
542        (*output)[i] = '_';
543        continue;
544      default:
545        continue;
546    }
547  }
548}
549
550}  // namespace net
551