1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/base/sdch_manager.h"
6
7#include "base/base64.h"
8#include "base/logging.h"
9#include "base/metrics/histogram.h"
10#include "base/strings/string_number_conversions.h"
11#include "base/strings/string_util.h"
12#include "crypto/sha2.h"
13#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14#include "net/url_request/url_request_http_job.h"
15
16namespace {
17
18void StripTrailingDot(GURL* gurl) {
19  std::string host(gurl->host());
20
21  if (host.empty())
22    return;
23
24  if (*host.rbegin() != '.')
25    return;
26
27  host.resize(host.size() - 1);
28
29  GURL::Replacements replacements;
30  replacements.SetHostStr(host);
31  *gurl = gurl->ReplaceComponents(replacements);
32  return;
33}
34
35}  // namespace
36
37namespace net {
38
39//------------------------------------------------------------------------------
40// static
41
42// Adjust SDCH limits downwards for mobile.
43#if defined(OS_ANDROID) || defined(OS_IOS)
44// static
45const size_t SdchManager::kMaxDictionaryCount = 1;
46const size_t SdchManager::kMaxDictionarySize = 500 * 1000;
47#else
48// static
49const size_t SdchManager::kMaxDictionaryCount = 20;
50const size_t SdchManager::kMaxDictionarySize = 1000 * 1000;
51#endif
52
53// static
54#if defined(OS_IOS)
55// Workaround for http://crbug.com/418975; remove when fixed.
56bool SdchManager::g_sdch_enabled_ = false;
57#else
58bool SdchManager::g_sdch_enabled_ = true;
59#endif
60
61// static
62bool SdchManager::g_secure_scheme_supported_ = true;
63
64//------------------------------------------------------------------------------
65SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
66                                    size_t offset,
67                                    const std::string& client_hash,
68                                    const GURL& gurl,
69                                    const std::string& domain,
70                                    const std::string& path,
71                                    const base::Time& expiration,
72                                    const std::set<int>& ports)
73    : text_(dictionary_text, offset),
74      client_hash_(client_hash),
75      url_(gurl),
76      domain_(domain),
77      path_(path),
78      expiration_(expiration),
79      ports_(ports) {
80}
81
82SdchManager::Dictionary::~Dictionary() {
83}
84
85bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
86  /* The specific rules of when a dictionary should be advertised in an
87     Avail-Dictionary header are modeled after the rules for cookie scoping. The
88     terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
89     dictionary may be advertised in the Avail-Dictionaries header exactly when
90     all of the following are true:
91      1. The server's effective host name domain-matches the Domain attribute of
92         the dictionary.
93      2. If the dictionary has a Port attribute, the request port is one of the
94         ports listed in the Port attribute.
95      3. The request URI path-matches the path header of the dictionary.
96      4. The request is not an HTTPS request.
97     We can override (ignore) item (4) only when we have explicitly enabled
98     HTTPS support AND the dictionary acquisition scheme matches the target
99     url scheme.
100    */
101  if (!DomainMatch(target_url, domain_))
102    return false;
103  if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
104    return false;
105  if (path_.size() && !PathMatch(target_url.path(), path_))
106    return false;
107  if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure())
108    return false;
109  if (target_url.SchemeIsSecure() != url_.SchemeIsSecure())
110    return false;
111  if (base::Time::Now() > expiration_)
112    return false;
113  return true;
114}
115
116//------------------------------------------------------------------------------
117// Security functions restricting loads and use of dictionaries.
118
119// static
120bool SdchManager::Dictionary::CanSet(const std::string& domain,
121                                     const std::string& path,
122                                     const std::set<int>& ports,
123                                     const GURL& dictionary_url) {
124  /*
125  A dictionary is invalid and must not be stored if any of the following are
126  true:
127    1. The dictionary has no Domain attribute.
128    2. The effective host name that derives from the referer URL host name does
129      not domain-match the Domain attribute.
130    3. The Domain attribute is a top level domain.
131    4. The referer URL host is a host domain name (not IP address) and has the
132      form HD, where D is the value of the Domain attribute, and H is a string
133      that contains one or more dots.
134    5. If the dictionary has a Port attribute and the referer URL's port was not
135      in the list.
136  */
137
138  // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
139  // and hence the conservative approach is to not allow any redirects (if there
140  // were any... then don't allow the dictionary to be set).
141
142  if (domain.empty()) {
143    SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
144    return false;  // Domain is required.
145  }
146  if (registry_controlled_domains::GetDomainAndRegistry(
147        domain,
148        registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) {
149    SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
150    return false;  // domain was a TLD.
151  }
152  if (!Dictionary::DomainMatch(dictionary_url, domain)) {
153    SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
154    return false;
155  }
156
157  std::string referrer_url_host = dictionary_url.host();
158  size_t postfix_domain_index = referrer_url_host.rfind(domain);
159  // See if it is indeed a postfix, or just an internal string.
160  if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
161    // It is a postfix... so check to see if there's a dot in the prefix.
162    size_t end_of_host_index = referrer_url_host.find_first_of('.');
163    if (referrer_url_host.npos != end_of_host_index  &&
164        end_of_host_index < postfix_domain_index) {
165      SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
166      return false;
167    }
168  }
169
170  if (!ports.empty()
171      && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
172    SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
173    return false;
174  }
175  return true;
176}
177
178// static
179bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
180  /*
181    1. The request URL's host name domain-matches the Domain attribute of the
182      dictionary.
183    2. If the dictionary has a Port attribute, the request port is one of the
184      ports listed in the Port attribute.
185    3. The request URL path-matches the path attribute of the dictionary.
186    4. The request is not an HTTPS request.
187    We can override (ignore) item (4) only when we have explicitly enabled
188    HTTPS support AND the dictionary acquisition scheme matches the target
189     url scheme.
190  */
191  if (!DomainMatch(referring_url, domain_)) {
192    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
193    return false;
194  }
195  if (!ports_.empty()
196      && 0 == ports_.count(referring_url.EffectiveIntPort())) {
197    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
198    return false;
199  }
200  if (path_.size() && !PathMatch(referring_url.path(), path_)) {
201    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
202    return false;
203  }
204  if (!SdchManager::secure_scheme_supported() &&
205      referring_url.SchemeIsSecure()) {
206    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
207    return false;
208  }
209  if (referring_url.SchemeIsSecure() != url_.SchemeIsSecure()) {
210    SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
211    return false;
212  }
213
214  // TODO(jar): Remove overly restrictive failsafe test (added per security
215  // review) when we have a need to be more general.
216  if (!referring_url.SchemeIsHTTPOrHTTPS()) {
217    SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
218    return false;
219  }
220
221  return true;
222}
223
224bool SdchManager::Dictionary::PathMatch(const std::string& path,
225                                        const std::string& restriction) {
226  /*  Must be either:
227  1. P2 is equal to P1
228  2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
229      character following P2 in P1 is "/".
230      */
231  if (path == restriction)
232    return true;
233  size_t prefix_length = restriction.size();
234  if (prefix_length > path.size())
235    return false;  // Can't be a prefix.
236  if (0 != path.compare(0, prefix_length, restriction))
237    return false;
238  return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
239}
240
241// static
242bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
243                                          const std::string& restriction) {
244  // TODO(jar): This is not precisely a domain match definition.
245  return gurl.DomainIs(restriction.data(), restriction.size());
246}
247
248//------------------------------------------------------------------------------
249SdchManager::SdchManager()
250    : fetches_count_for_testing_(0) {
251  DCHECK(CalledOnValidThread());
252}
253
254SdchManager::~SdchManager() {
255  DCHECK(CalledOnValidThread());
256  while (!dictionaries_.empty()) {
257    DictionaryMap::iterator it = dictionaries_.begin();
258    dictionaries_.erase(it->first);
259  }
260}
261
262void SdchManager::ClearData() {
263  blacklisted_domains_.clear();
264  allow_latency_experiment_.clear();
265  if (fetcher_.get())
266    fetcher_->Cancel();
267
268  // Note that this may result in not having dictionaries we've advertised
269  // for incoming responses.  The window is relatively small (as ClearData()
270  // is not expected to be called frequently), so we rely on meta-refresh
271  // to handle this case.
272  dictionaries_.clear();
273}
274
275// static
276void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
277  UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
278}
279
280void SdchManager::set_sdch_fetcher(scoped_ptr<SdchFetcher> fetcher) {
281  DCHECK(CalledOnValidThread());
282  fetcher_ = fetcher.Pass();
283}
284
285// static
286void SdchManager::EnableSdchSupport(bool enabled) {
287  g_sdch_enabled_ = enabled;
288}
289
290// static
291void SdchManager::EnableSecureSchemeSupport(bool enabled) {
292  g_secure_scheme_supported_ = enabled;
293}
294
295void SdchManager::BlacklistDomain(const GURL& url,
296                                  ProblemCodes blacklist_reason) {
297  SetAllowLatencyExperiment(url, false);
298
299  BlacklistInfo* blacklist_info =
300      &blacklisted_domains_[base::StringToLowerASCII(url.host())];
301
302  if (blacklist_info->count > 0)
303    return;  // Domain is already blacklisted.
304
305  if (blacklist_info->exponential_count > (INT_MAX - 1) / 2) {
306    blacklist_info->exponential_count = INT_MAX;
307  } else {
308    blacklist_info->exponential_count =
309        blacklist_info->exponential_count * 2 + 1;
310  }
311
312  blacklist_info->count = blacklist_info->exponential_count;
313  blacklist_info->reason = blacklist_reason;
314}
315
316void SdchManager::BlacklistDomainForever(const GURL& url,
317                                         ProblemCodes blacklist_reason) {
318  SetAllowLatencyExperiment(url, false);
319
320  BlacklistInfo* blacklist_info =
321      &blacklisted_domains_[base::StringToLowerASCII(url.host())];
322  blacklist_info->count = INT_MAX;
323  blacklist_info->exponential_count = INT_MAX;
324  blacklist_info->reason = blacklist_reason;
325}
326
327void SdchManager::ClearBlacklistings() {
328  blacklisted_domains_.clear();
329}
330
331void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
332  BlacklistInfo* blacklist_info = &blacklisted_domains_[
333      base::StringToLowerASCII(domain)];
334  blacklist_info->count = 0;
335  blacklist_info->reason = MIN_PROBLEM_CODE;
336}
337
338int SdchManager::BlackListDomainCount(const std::string& domain) {
339  std::string domain_lower(base::StringToLowerASCII(domain));
340
341  if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower))
342    return 0;
343  return blacklisted_domains_[domain_lower].count;
344}
345
346int SdchManager::BlacklistDomainExponential(const std::string& domain) {
347  std::string domain_lower(base::StringToLowerASCII(domain));
348
349  if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower))
350    return 0;
351  return blacklisted_domains_[domain_lower].exponential_count;
352}
353
354bool SdchManager::IsInSupportedDomain(const GURL& url) {
355  DCHECK(CalledOnValidThread());
356  if (!g_sdch_enabled_ )
357    return false;
358
359  if (!secure_scheme_supported() && url.SchemeIsSecure())
360    return false;
361
362  if (blacklisted_domains_.empty())
363    return true;
364
365  DomainBlacklistInfo::iterator it =
366      blacklisted_domains_.find(base::StringToLowerASCII(url.host()));
367  if (blacklisted_domains_.end() == it || it->second.count == 0)
368    return true;
369
370  UMA_HISTOGRAM_ENUMERATION("Sdch3.BlacklistReason", it->second.reason,
371                            MAX_PROBLEM_CODE);
372  SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
373
374  int count = it->second.count - 1;
375  if (count > 0) {
376    it->second.count = count;
377  } else {
378    it->second.count = 0;
379    it->second.reason = MIN_PROBLEM_CODE;
380  }
381
382  return false;
383}
384
385void SdchManager::FetchDictionary(const GURL& request_url,
386                                  const GURL& dictionary_url) {
387  DCHECK(CalledOnValidThread());
388  if (CanFetchDictionary(request_url, dictionary_url) && fetcher_.get()) {
389    ++fetches_count_for_testing_;
390    fetcher_->Schedule(dictionary_url);
391  }
392}
393
394bool SdchManager::CanFetchDictionary(const GURL& referring_url,
395                                     const GURL& dictionary_url) const {
396  DCHECK(CalledOnValidThread());
397  /* The user agent may retrieve a dictionary from the dictionary URL if all of
398     the following are true:
399       1 The dictionary URL host name matches the referrer URL host name and
400           scheme.
401       2 The dictionary URL host name domain matches the parent domain of the
402           referrer URL host name
403       3 The parent domain of the referrer URL host name is not a top level
404           domain
405       4 The dictionary URL is not an HTTPS URL.
406   */
407  // Item (1) above implies item (2).  Spec should be updated.
408  // I take "host name match" to be "is identical to"
409  if (referring_url.host() != dictionary_url.host() ||
410      referring_url.scheme() != dictionary_url.scheme()) {
411    SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
412    return false;
413  }
414  if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) {
415    SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
416    return false;
417  }
418
419  // TODO(jar): Remove this failsafe conservative hack which is more restrictive
420  // than current SDCH spec when needed, and justified by security audit.
421  if (!referring_url.SchemeIsHTTPOrHTTPS()) {
422    SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
423    return false;
424  }
425
426  return true;
427}
428
429void SdchManager::GetVcdiffDictionary(
430    const std::string& server_hash,
431    const GURL& referring_url,
432    scoped_refptr<Dictionary>* dictionary) {
433  DCHECK(CalledOnValidThread());
434  *dictionary = NULL;
435  DictionaryMap::iterator it = dictionaries_.find(server_hash);
436  if (it == dictionaries_.end()) {
437    return;
438  }
439  scoped_refptr<Dictionary> matching_dictionary = it->second;
440  if (!IsInSupportedDomain(referring_url))
441    return;
442  if (!matching_dictionary->CanUse(referring_url))
443    return;
444  *dictionary = matching_dictionary;
445}
446
447// TODO(jar): If we have evictions from the dictionaries_, then we need to
448// change this interface to return a list of reference counted Dictionary
449// instances that can be used if/when a server specifies one.
450void SdchManager::GetAvailDictionaryList(const GURL& target_url,
451                                         std::string* list) {
452  DCHECK(CalledOnValidThread());
453  int count = 0;
454  for (DictionaryMap::iterator it = dictionaries_.begin();
455       it != dictionaries_.end(); ++it) {
456    if (!IsInSupportedDomain(target_url))
457      continue;
458    if (!it->second->CanAdvertise(target_url))
459      continue;
460    ++count;
461    if (!list->empty())
462      list->append(",");
463    list->append(it->second->client_hash());
464  }
465  // Watch to see if we have corrupt or numerous dictionaries.
466  if (count > 0)
467    UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
468}
469
470// static
471void SdchManager::GenerateHash(const std::string& dictionary_text,
472    std::string* client_hash, std::string* server_hash) {
473  char binary_hash[32];
474  crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
475
476  std::string first_48_bits(&binary_hash[0], 6);
477  std::string second_48_bits(&binary_hash[6], 6);
478  UrlSafeBase64Encode(first_48_bits, client_hash);
479  UrlSafeBase64Encode(second_48_bits, server_hash);
480
481  DCHECK_EQ(server_hash->length(), 8u);
482  DCHECK_EQ(client_hash->length(), 8u);
483}
484
485//------------------------------------------------------------------------------
486// Methods for supporting latency experiments.
487
488bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
489  DCHECK(CalledOnValidThread());
490  return allow_latency_experiment_.end() !=
491      allow_latency_experiment_.find(url.host());
492}
493
494void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
495  DCHECK(CalledOnValidThread());
496  if (enable) {
497    allow_latency_experiment_.insert(url.host());
498    return;
499  }
500  ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
501  if (allow_latency_experiment_.end() == it)
502    return;  // It was already erased, or never allowed.
503  SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
504  allow_latency_experiment_.erase(it);
505}
506
507void SdchManager::AddSdchDictionary(const std::string& dictionary_text,
508    const GURL& dictionary_url) {
509  DCHECK(CalledOnValidThread());
510  std::string client_hash;
511  std::string server_hash;
512  GenerateHash(dictionary_text, &client_hash, &server_hash);
513  if (dictionaries_.find(server_hash) != dictionaries_.end()) {
514    SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
515    return;                             // Already loaded.
516  }
517
518  std::string domain, path;
519  std::set<int> ports;
520  base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
521
522  if (dictionary_text.empty()) {
523    SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
524    return;                             // Missing header.
525  }
526
527  size_t header_end = dictionary_text.find("\n\n");
528  if (std::string::npos == header_end) {
529    SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
530    return;                             // Missing header.
531  }
532  size_t line_start = 0;  // Start of line being parsed.
533  while (1) {
534    size_t line_end = dictionary_text.find('\n', line_start);
535    DCHECK(std::string::npos != line_end);
536    DCHECK_LE(line_end, header_end);
537
538    size_t colon_index = dictionary_text.find(':', line_start);
539    if (std::string::npos == colon_index) {
540      SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
541      return;                         // Illegal line missing a colon.
542    }
543
544    if (colon_index > line_end)
545      break;
546
547    size_t value_start = dictionary_text.find_first_not_of(" \t",
548                                                           colon_index + 1);
549    if (std::string::npos != value_start) {
550      if (value_start >= line_end)
551        break;
552      std::string name(dictionary_text, line_start, colon_index - line_start);
553      std::string value(dictionary_text, value_start, line_end - value_start);
554      name = base::StringToLowerASCII(name);
555      if (name == "domain") {
556        domain = value;
557      } else if (name == "path") {
558        path = value;
559      } else if (name == "format-version") {
560        if (value != "1.0")
561          return;
562      } else if (name == "max-age") {
563        int64 seconds;
564        base::StringToInt64(value, &seconds);
565        expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
566      } else if (name == "port") {
567        int port;
568        base::StringToInt(value, &port);
569        if (port >= 0)
570          ports.insert(port);
571      }
572    }
573
574    if (line_end >= header_end)
575      break;
576    line_start = line_end + 1;
577  }
578
579  // Narrow fix for http://crbug.com/389451.
580  GURL dictionary_url_normalized(dictionary_url);
581  StripTrailingDot(&dictionary_url_normalized);
582
583  if (!IsInSupportedDomain(dictionary_url_normalized))
584    return;
585
586  if (!Dictionary::CanSet(domain, path, ports, dictionary_url_normalized))
587    return;
588
589  // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
590  // useless dictionaries.  We should probably have a cache eviction plan,
591  // instead of just blocking additions.  For now, with the spec in flux, it
592  // is probably not worth doing eviction handling.
593  if (kMaxDictionarySize < dictionary_text.size()) {
594    SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
595    return;
596  }
597  if (kMaxDictionaryCount <= dictionaries_.size()) {
598    SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
599    return;
600  }
601
602  UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
603  DVLOG(1) << "Loaded dictionary with client hash " << client_hash
604           << " and server hash " << server_hash;
605  Dictionary* dictionary =
606      new Dictionary(dictionary_text, header_end + 2, client_hash,
607                     dictionary_url_normalized, domain,
608                     path, expiration, ports);
609  dictionaries_[server_hash] = dictionary;
610  return;
611}
612
613// static
614void SdchManager::UrlSafeBase64Encode(const std::string& input,
615                                      std::string* output) {
616  // Since this is only done during a dictionary load, and hashes are only 8
617  // characters, we just do the simple fixup, rather than rewriting the encoder.
618  base::Base64Encode(input, output);
619  std::replace(output->begin(), output->end(), '+', '-');
620  std::replace(output->begin(), output->end(), '/', '_');
621}
622
623}  // namespace net
624