content_settings_pattern.cc revision dc0f95d653279beabeb9817299e2902918ba123e
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/content_settings/content_settings_pattern.h"
6
7#include "base/string_util.h"
8#include "chrome/common/url_constants.h"
9#include "net/base/net_util.h"
10#include "googleurl/src/gurl.h"
11#include "googleurl/src/url_canon.h"
12
13namespace {
14bool IsValidHostlessPattern(const std::string& pattern) {
15  std::string file_scheme_plus_separator(chrome::kFileScheme);
16  file_scheme_plus_separator += chrome::kStandardSchemeSeparator;
17
18  return StartsWithASCII(pattern, file_scheme_plus_separator, false);
19}
20}  // namespace
21
22// The version of the pattern format implemented. Version 1 includes the
23// following patterns:
24//   - [*.]domain.tld (matches domain.tld and all sub-domains)
25//   - host (matches an exact hostname)
26//   - a.b.c.d (matches an exact IPv4 ip)
27//   - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
28//   - file:///tmp/test.html (a complete URL without a host)
29// Version 2 adds a resource identifier for plugins.
30// TODO(jochen): update once this feature is no longer behind a flag.
31const int ContentSettingsPattern::kContentSettingsPatternVersion = 1;
32const char* ContentSettingsPattern::kDomainWildcard = "[*.]";
33const size_t ContentSettingsPattern::kDomainWildcardLength = 4;
34
35// static
36ContentSettingsPattern ContentSettingsPattern::FromURL(
37    const GURL& url) {
38  return ContentSettingsPattern(!url.has_host() || url.HostIsIPAddress() ?
39      net::GetHostOrSpecFromURL(url) :
40      std::string(kDomainWildcard) + url.host());
41}
42
43// static
44ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard(
45    const GURL& url) {
46  return ContentSettingsPattern(net::GetHostOrSpecFromURL(url));
47}
48
49bool ContentSettingsPattern::IsValid() const {
50  if (pattern_.empty())
51    return false;
52
53  if (IsValidHostlessPattern(pattern_))
54    return true;
55
56  const std::string host(pattern_.length() > kDomainWildcardLength &&
57                         StartsWithASCII(pattern_, kDomainWildcard, false) ?
58                         pattern_.substr(kDomainWildcardLength) :
59                         pattern_);
60  url_canon::CanonHostInfo host_info;
61  return host.find('*') == std::string::npos &&
62         !net::CanonicalizeHost(host, &host_info).empty();
63}
64
65bool ContentSettingsPattern::Matches(const GURL& url) const {
66  if (!IsValid())
67    return false;
68
69  const std::string host(net::GetHostOrSpecFromURL(url));
70  if (pattern_.length() < kDomainWildcardLength ||
71      !StartsWithASCII(pattern_, kDomainWildcard, false))
72    return pattern_ == host;
73
74  const size_t match =
75      host.rfind(pattern_.substr(kDomainWildcardLength));
76
77  return (match != std::string::npos) &&
78         (match == 0 || host[match - 1] == '.') &&
79         (match + pattern_.length() - kDomainWildcardLength == host.length());
80}
81
82std::string ContentSettingsPattern::CanonicalizePattern() const {
83  if (!IsValid())
84    return "";
85
86  if (IsValidHostlessPattern(pattern_))
87    return GURL(pattern_).spec();
88
89  bool starts_with_wildcard = pattern_.length() > kDomainWildcardLength &&
90      StartsWithASCII(pattern_, kDomainWildcard, false);
91
92  const std::string host(starts_with_wildcard ?
93      pattern_.substr(kDomainWildcardLength) : pattern_);
94
95  std::string canonicalized_pattern =
96      starts_with_wildcard ? kDomainWildcard : "";
97
98  url_canon::CanonHostInfo host_info;
99  canonicalized_pattern += net::CanonicalizeHost(host, &host_info);
100
101  return canonicalized_pattern;
102}
103