1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/content_settings/content_settings_pattern.h"
6
7#include "base/string_util.h"
8#include "chrome/common/url_constants.h"
9#include "net/base/net_util.h"
10#include "googleurl/src/gurl.h"
11#include "googleurl/src/url_canon.h"
12
13namespace {
14
15bool IsValidHostlessPattern(const std::string& pattern) {
16  std::string file_scheme_plus_separator(chrome::kFileScheme);
17  file_scheme_plus_separator += chrome::kStandardSchemeSeparator;
18
19  return StartsWithASCII(pattern, file_scheme_plus_separator, false);
20}
21
22}  // namespace
23
24// The version of the pattern format implemented. Version 1 includes the
25// following patterns:
26//   - [*.]domain.tld (matches domain.tld and all sub-domains)
27//   - host (matches an exact hostname)
28//   - a.b.c.d (matches an exact IPv4 ip)
29//   - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
30//   - file:///tmp/test.html (a complete URL without a host)
31// Version 2 adds a resource identifier for plugins.
32// TODO(jochen): update once this feature is no longer behind a flag.
33const int ContentSettingsPattern::kContentSettingsPatternVersion = 1;
34const char* ContentSettingsPattern::kDomainWildcard = "[*.]";
35const size_t ContentSettingsPattern::kDomainWildcardLength = 4;
36
37// static
38ContentSettingsPattern ContentSettingsPattern::FromURL(
39    const GURL& url) {
40  // TODO(markusheintz): Add scheme wildcard;
41  return ContentSettingsPattern(!url.has_host() || url.HostIsIPAddress() ?
42      net::GetHostOrSpecFromURL(url) :
43      std::string(kDomainWildcard) + url.host());
44}
45
46// static
47ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard(
48    const GURL& url) {
49  return ContentSettingsPattern(net::GetHostOrSpecFromURL(url), url.scheme());
50}
51
52bool ContentSettingsPattern::IsValid() const {
53  if (pattern_.empty())
54    return false;
55
56  if (IsValidHostlessPattern(pattern_))
57    return true;
58
59  const std::string host(pattern_.length() > kDomainWildcardLength &&
60                         StartsWithASCII(pattern_, kDomainWildcard, false) ?
61                         pattern_.substr(kDomainWildcardLength) :
62                         pattern_);
63  url_canon::CanonHostInfo host_info;
64  return host.find('*') == std::string::npos &&
65         !net::CanonicalizeHost(host, &host_info).empty();
66}
67
68bool ContentSettingsPattern::Matches(const GURL& url) const {
69  if (!IsValid())
70    return false;
71
72  const std::string host(net::GetHostOrSpecFromURL(url));
73  if (pattern_.length() < kDomainWildcardLength ||
74      !StartsWithASCII(pattern_, kDomainWildcard, false))
75    return pattern_ == host;
76
77  const size_t match =
78      host.rfind(pattern_.substr(kDomainWildcardLength));
79
80  return (match != std::string::npos) &&
81         (match == 0 || host[match - 1] == '.') &&
82         (match + pattern_.length() - kDomainWildcardLength == host.length());
83}
84
85std::string ContentSettingsPattern::CanonicalizePattern() const {
86  if (!IsValid())
87    return "";
88
89  if (IsValidHostlessPattern(pattern_))
90    return GURL(pattern_).spec();
91
92  bool starts_with_wildcard = pattern_.length() > kDomainWildcardLength &&
93      StartsWithASCII(pattern_, kDomainWildcard, false);
94
95  const std::string host(starts_with_wildcard ?
96      pattern_.substr(kDomainWildcardLength) : pattern_);
97
98  std::string canonicalized_pattern =
99      starts_with_wildcard ? kDomainWildcard : "";
100
101  url_canon::CanonHostInfo host_info;
102  canonicalized_pattern += net::CanonicalizeHost(host, &host_info);
103
104  return canonicalized_pattern;
105}
106