1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
28ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen// Use of this source code is governed by a BSD-style license that can be
38ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen// found in the LICENSE file.
48ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
58ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen#include "chrome/common/extensions/url_pattern.h"
68ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
78ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen#include "base/string_piece.h"
88ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen#include "base/string_split.h"
98ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen#include "base/string_util.h"
108ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen#include "chrome/common/url_constants.h"
118ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen#include "googleurl/src/gurl.h"
12731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "googleurl/src/url_util.h"
138ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
14dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char URLPattern::kAllUrlsPattern[] = "<all_urls>";
15dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
16dc0f95d653279beabeb9817299e2902918ba123eKristian Monsennamespace {
17dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
188ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen// TODO(aa): Consider adding chrome-extension? What about more obscure ones
198ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen// like data: and javascript: ?
208ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen// Note: keep this array in sync with kValidSchemeMasks.
21dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kValidSchemes[] = {
228ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  chrome::kHttpScheme,
238ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  chrome::kHttpsScheme,
248ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  chrome::kFileScheme,
258ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  chrome::kFtpScheme,
268ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  chrome::kChromeUIScheme,
27ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  chrome::kFileSystemScheme,
288ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen};
298ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
30dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst int kValidSchemeMasks[] = {
318ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  URLPattern::SCHEME_HTTP,
328ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  URLPattern::SCHEME_HTTPS,
338ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  URLPattern::SCHEME_FILE,
348ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  URLPattern::SCHEME_FTP,
358ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  URLPattern::SCHEME_CHROMEUI,
36ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  URLPattern::SCHEME_FILESYSTEM,
378ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen};
388ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
398ae428e0fb7feea16d79853f29447469a93bedffKristian MonsenCOMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
408ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen               must_keep_these_arrays_in_sync);
418ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
42dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseSuccess = "Success.";
43dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseErrorMissingSchemeSeparator = "Missing scheme separator.";
44dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseErrorInvalidScheme = "Invalid scheme.";
45dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseErrorWrongSchemeType = "Wrong scheme type.";
46dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseErrorEmptyHost = "Host can not be empty.";
47dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseErrorInvalidHostWildcard = "Invalid host wildcard.";
48dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseErrorEmptyPath = "Empty path.";
49dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseErrorHasColon =
50dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    "Ports are not supported in URL patterns. ':' may not be used in a host.";
51dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
52dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Message explaining each URLPattern::ParseResult.
53dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* kParseResultMessages[] = {
54dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  kParseSuccess,
55dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  kParseErrorMissingSchemeSeparator,
56dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  kParseErrorInvalidScheme,
57dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  kParseErrorWrongSchemeType,
58dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  kParseErrorEmptyHost,
59dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  kParseErrorInvalidHostWildcard,
60dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  kParseErrorEmptyPath,
61dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  kParseErrorHasColon
62dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen};
63dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
64dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenCOMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
65dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen               must_add_message_for_each_parse_result);
668ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
67dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char kPathSeparator[] = "/";
68731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
69dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool IsStandardScheme(const std::string& scheme) {
70731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // "*" gets the same treatment as a standard scheme.
71731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (scheme == "*")
72731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    return true;
73731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
74731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  return url_util::IsStandard(scheme.c_str(),
75731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      url_parse::Component(0, static_cast<int>(scheme.length())));
76731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
778ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
78dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}  // namespace
79dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
808ae428e0fb7feea16d79853f29447469a93bedffKristian MonsenURLPattern::URLPattern()
81731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    : valid_schemes_(SCHEME_NONE),
82731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      match_all_urls_(false),
83731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      match_subdomains_(false) {}
848ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
858ae428e0fb7feea16d79853f29447469a93bedffKristian MonsenURLPattern::URLPattern(int valid_schemes)
868ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    : valid_schemes_(valid_schemes), match_all_urls_(false),
878ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      match_subdomains_(false) {}
888ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
898ae428e0fb7feea16d79853f29447469a93bedffKristian MonsenURLPattern::URLPattern(int valid_schemes, const std::string& pattern)
908ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    : valid_schemes_(valid_schemes), match_all_urls_(false),
918ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      match_subdomains_(false) {
92dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
93dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Strict error checking is used, because this constructor is only
94dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // appropriate when we know |pattern| is valid.
95dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT))
968ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    NOTREACHED() << "URLPattern is invalid: " << pattern;
978ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
988ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
998ae428e0fb7feea16d79853f29447469a93bedffKristian MonsenURLPattern::~URLPattern() {
1008ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
1018ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
102dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenURLPattern::ParseResult URLPattern::Parse(const std::string& pattern,
103dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                                          ParseOption strictness) {
104dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  CHECK(strictness == PARSE_LENIENT ||
105dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen        strictness == PARSE_STRICT);
106dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
1078ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // Special case pattern to match every valid URL.
1088ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (pattern == kAllUrlsPattern) {
1098ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    match_all_urls_ = true;
1108ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    match_subdomains_ = true;
1118ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    scheme_ = "*";
1128ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    host_.clear();
113dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    SetPath("/*");
114513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    return PARSE_SUCCESS;
115513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  }
116513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch
117513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  // Parse out the scheme.
118513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  size_t scheme_end_pos = pattern.find(chrome::kStandardSchemeSeparator);
119513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  bool has_standard_scheme_separator = true;
120513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch
121513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  // Some urls also use ':' alone as the scheme separator.
122513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  if (scheme_end_pos == std::string::npos) {
123513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    scheme_end_pos = pattern.find(':');
124513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    has_standard_scheme_separator = false;
1258ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  }
1268ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1278ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (scheme_end_pos == std::string::npos)
128513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
1298ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1308ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!SetScheme(pattern.substr(0, scheme_end_pos)))
131513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    return PARSE_ERROR_INVALID_SCHEME;
1328ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
133513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  bool standard_scheme = IsStandardScheme(scheme_);
134513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  if (standard_scheme != has_standard_scheme_separator)
135513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    return PARSE_ERROR_WRONG_SCHEME_SEPARATOR;
136731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
137731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Advance past the scheme separator.
138513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  scheme_end_pos +=
139513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch      (standard_scheme ? strlen(chrome::kStandardSchemeSeparator) : 1);
140513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  if (scheme_end_pos >= pattern.size())
141513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    return PARSE_ERROR_EMPTY_HOST;
1428ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1438ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // Parse out the host and path.
144513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  size_t host_start_pos = scheme_end_pos;
1458ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  size_t path_start_pos = 0;
1468ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
147731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // File URLs are special because they have no host.
148731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (scheme_ == chrome::kFileScheme || !standard_scheme) {
1498ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    path_start_pos = host_start_pos;
1508ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  } else {
1518ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
152513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch
153513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    // Host is required.
154513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch    if (host_start_pos == host_end_pos)
155513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch      return PARSE_ERROR_EMPTY_HOST;
156513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch
1578ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    if (host_end_pos == std::string::npos)
158513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch      return PARSE_ERROR_EMPTY_PATH;
1598ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1608ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
1618ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1628ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    // The first component can optionally be '*' to match all subdomains.
1638ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    std::vector<std::string> host_components;
164731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    base::SplitString(host_, '.', &host_components);
1658ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    if (host_components[0] == "*") {
1668ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      match_subdomains_ = true;
1678ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      host_components.erase(host_components.begin(),
1688ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen                            host_components.begin() + 1);
1698ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    }
1708ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    host_ = JoinString(host_components, '.');
1718ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1728ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    // No other '*' can occur in the host, though. This isn't necessary, but is
1738ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    // done as a convenience to developers who might otherwise be confused and
1748ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    // think '*' works as a glob in the host.
1758ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    if (host_.find('*') != std::string::npos)
176513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch      return PARSE_ERROR_INVALID_HOST_WILDCARD;
1778ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1788ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    path_start_pos = host_end_pos;
1798ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  }
1808ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
181dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SetPath(pattern.substr(path_start_pos));
182dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
183dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos)
184dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return PARSE_ERROR_HAS_COLON;
1858ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
186513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  return PARSE_SUCCESS;
1878ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
1888ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1898ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenbool URLPattern::SetScheme(const std::string& scheme) {
1908ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  scheme_ = scheme;
1918ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (scheme_ == "*") {
1928ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
1938ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  } else if (!IsValidScheme(scheme_)) {
1948ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
1958ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  }
1968ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return true;
1978ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
1988ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
1998ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenbool URLPattern::IsValidScheme(const std::string& scheme) const {
200731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (valid_schemes_ == SCHEME_ALL)
201731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    return true;
202731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
2038ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
2048ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
2058ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      return true;
2068ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  }
2078ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2088ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return false;
2098ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
2108ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
211dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid URLPattern::SetPath(const std::string& path) {
212dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  path_ = path;
213dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  path_escaped_ = path_;
214dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
215dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
216dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
217dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
2188ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenbool URLPattern::MatchesUrl(const GURL &test) const {
2198ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!MatchesScheme(test.scheme()))
2208ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
2218ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
222731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (match_all_urls_)
223731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    return true;
224731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
2258ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!MatchesHost(test))
2268ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
2278ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2288ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!MatchesPath(test.PathForRequest()))
2298ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
2308ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2318ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return true;
2328ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
2338ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2348ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenbool URLPattern::MatchesScheme(const std::string& test) const {
235ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (!IsValidScheme(test))
236ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return false;
2378ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
238ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return scheme_ == "*" || test == scheme_;
2398ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
2408ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2418ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenbool URLPattern::MatchesHost(const std::string& host) const {
2428ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  std::string test(chrome::kHttpScheme);
2438ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  test += chrome::kStandardSchemeSeparator;
2448ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  test += host;
2458ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  test += "/";
2468ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return MatchesHost(GURL(test));
2478ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
2488ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2498ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenbool URLPattern::MatchesHost(const GURL& test) const {
2508ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // If the hosts are exactly equal, we have a match.
2518ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (test.host() == host_)
2528ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return true;
2538ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2548ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // If we're matching subdomains, and we have no host in the match pattern,
2558ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // that means that we're matching all hosts, which means we have a match no
2568ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // matter what the test host is.
2578ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (match_subdomains_ && host_.empty())
2588ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return true;
2598ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2608ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // Otherwise, we can only match if our match pattern matches subdomains.
2618ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!match_subdomains_)
2628ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
2638ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2648ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // We don't do subdomain matching against IP addresses, so we can give up now
2658ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // if the test host is an IP address.
2668ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (test.HostIsIPAddress())
2678ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
2688ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2698ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // Check if the test host is a subdomain of our host.
2708ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (test.host().length() <= (host_.length() + 1))
2718ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
2728ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2738ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (test.host().compare(test.host().length() - host_.length(),
2748ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen                          host_.length(), host_) != 0)
2758ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
2768ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2778ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return test.host()[test.host().length() - host_.length() - 1] == '.';
2788ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
2798ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2808ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenbool URLPattern::MatchesPath(const std::string& test) const {
2818ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!MatchPattern(test, path_escaped_))
2828ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
2838ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2848ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return true;
2858ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
2868ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
2878ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenstd::string URLPattern::GetAsString() const {
2888ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (match_all_urls_)
2898ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return kAllUrlsPattern;
2908ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
291731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  bool standard_scheme = IsStandardScheme(scheme_);
292731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
293731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  std::string spec = scheme_ +
294731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      (standard_scheme ? chrome::kStandardSchemeSeparator : ":");
2958ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
296731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (scheme_ != chrome::kFileScheme && standard_scheme) {
2978ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    if (match_subdomains_) {
2988ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      spec += "*";
2998ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      if (!host_.empty())
3008ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen        spec += ".";
3018ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    }
3028ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3038ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    if (!host_.empty())
3048ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      spec += host_;
3058ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  }
3068ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3078ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!path_.empty())
3088ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    spec += path_;
3098ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3108ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return spec;
3118ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
3128ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3138ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenbool URLPattern::OverlapsWith(const URLPattern& other) const {
3148ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!MatchesScheme(other.scheme_) && !other.MatchesScheme(scheme_))
3158ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
3168ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3178ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!MatchesHost(other.host()) && !other.MatchesHost(host_))
3188ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
3198ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3208ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // We currently only use OverlapsWith() for the patterns inside
3218ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // ExtensionExtent. In those cases, we know that the path will have only a
3228ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // single wildcard at the end. This makes figuring out overlap much easier. It
3238ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // seems like there is probably a computer-sciency way to solve the general
3248ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  // case, but we don't need that yet.
3258ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  DCHECK(path_.find('*') == path_.size() - 1);
3268ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  DCHECK(other.path().find('*') == other.path().size() - 1);
3278ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3288ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) &&
3298ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      !other.MatchesPath(path_.substr(0, path_.size() - 1)))
3308ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return false;
3318ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3328ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return true;
3338ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
3348ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3358ae428e0fb7feea16d79853f29447469a93bedffKristian Monsenstd::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
3368ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  std::vector<URLPattern> result;
3378ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
338ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
3398ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    result.push_back(*this);
3408ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    return result;
3418ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  }
3428ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3438ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
3448ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    if (MatchesScheme(kValidSchemes[i])) {
3458ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      URLPattern temp = *this;
3468ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      temp.SetScheme(kValidSchemes[i]);
3478ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      temp.set_match_all_urls(false);
3488ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen      result.push_back(temp);
3498ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen    }
3508ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  }
3518ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen
3528ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen  return result;
3538ae428e0fb7feea16d79853f29447469a93bedffKristian Monsen}
354dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
355dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// static
356dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst char* URLPattern::GetParseResultString(
357dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    URLPattern::ParseResult parse_result) {
358dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return kParseResultMessages[parse_result];
359dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
360