url_pattern.cc revision a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "extensions/common/url_pattern.h" 6 7#include "base/strings/string_number_conversions.h" 8#include "base/strings/string_piece.h" 9#include "base/strings/string_split.h" 10#include "base/strings/string_util.h" 11#include "content/public/common/url_constants.h" 12#include "extensions/common/constants.h" 13#include "url/gurl.h" 14#include "url/url_util.h" 15 16const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; 17 18namespace { 19 20// TODO(aa): What about more obscure schemes like data: and javascript: ? 21// Note: keep this array in sync with kValidSchemeMasks. 22const char* kValidSchemes[] = { 23 content::kHttpScheme, 24 content::kHttpsScheme, 25 chrome::kFileScheme, 26 content::kFtpScheme, 27 chrome::kChromeUIScheme, 28 extensions::kExtensionScheme, 29 chrome::kFileSystemScheme, 30}; 31 32const int kValidSchemeMasks[] = { 33 URLPattern::SCHEME_HTTP, 34 URLPattern::SCHEME_HTTPS, 35 URLPattern::SCHEME_FILE, 36 URLPattern::SCHEME_FTP, 37 URLPattern::SCHEME_CHROMEUI, 38 URLPattern::SCHEME_EXTENSION, 39 URLPattern::SCHEME_FILESYSTEM, 40}; 41 42COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), 43 must_keep_these_arrays_in_sync); 44 45const char kParseSuccess[] = "Success."; 46const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator."; 47const char kParseErrorInvalidScheme[] = "Invalid scheme."; 48const char kParseErrorWrongSchemeType[] = "Wrong scheme type."; 49const char kParseErrorEmptyHost[] = "Host can not be empty."; 50const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard."; 51const char kParseErrorEmptyPath[] = "Empty path."; 52const char kParseErrorInvalidPort[] = "Invalid port."; 53 54// Message explaining each URLPattern::ParseResult. 55const char* const kParseResultMessages[] = { 56 kParseSuccess, 57 kParseErrorMissingSchemeSeparator, 58 kParseErrorInvalidScheme, 59 kParseErrorWrongSchemeType, 60 kParseErrorEmptyHost, 61 kParseErrorInvalidHostWildcard, 62 kParseErrorEmptyPath, 63 kParseErrorInvalidPort, 64}; 65 66COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), 67 must_add_message_for_each_parse_result); 68 69const char kPathSeparator[] = "/"; 70 71bool IsStandardScheme(const std::string& scheme) { 72 // "*" gets the same treatment as a standard scheme. 73 if (scheme == "*") 74 return true; 75 76 return url_util::IsStandard(scheme.c_str(), 77 url_parse::Component(0, static_cast<int>(scheme.length()))); 78} 79 80bool IsValidPortForScheme(const std::string& scheme, const std::string& port) { 81 if (port == "*") 82 return true; 83 84 // Only accept non-wildcard ports if the scheme uses ports. 85 if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) == 86 url_parse::PORT_UNSPECIFIED) { 87 return false; 88 } 89 90 int parsed_port = url_parse::PORT_UNSPECIFIED; 91 if (!base::StringToInt(port, &parsed_port)) 92 return false; 93 return (parsed_port >= 0) && (parsed_port < 65536); 94} 95 96// Returns |path| with the trailing wildcard stripped if one existed. 97// 98// The functions that rely on this (OverlapsWith and Contains) are only 99// called for the patterns inside URLPatternSet. In those cases, we know that 100// the path will have only a single wildcard at the end. This makes figuring 101// out overlap much easier. It seems like there is probably a computer-sciency 102// way to solve the general case, but we don't need that yet. 103std::string StripTrailingWildcard(const std::string& path) { 104 size_t wildcard_index = path.find('*'); 105 size_t path_last = path.size() - 1; 106 DCHECK(wildcard_index == std::string::npos || wildcard_index == path_last); 107 return wildcard_index == path_last ? path.substr(0, path_last) : path; 108} 109 110} // namespace 111 112URLPattern::URLPattern() 113 : valid_schemes_(SCHEME_NONE), 114 match_all_urls_(false), 115 match_subdomains_(false), 116 port_("*") {} 117 118URLPattern::URLPattern(int valid_schemes) 119 : valid_schemes_(valid_schemes), 120 match_all_urls_(false), 121 match_subdomains_(false), 122 port_("*") {} 123 124URLPattern::URLPattern(int valid_schemes, const std::string& pattern) 125 // Strict error checking is used, because this constructor is only 126 // appropriate when we know |pattern| is valid. 127 : valid_schemes_(valid_schemes), 128 match_all_urls_(false), 129 match_subdomains_(false), 130 port_("*") { 131 if (PARSE_SUCCESS != Parse(pattern)) 132 NOTREACHED() << "URLPattern is invalid: " << pattern; 133} 134 135URLPattern::~URLPattern() { 136} 137 138bool URLPattern::operator<(const URLPattern& other) const { 139 return GetAsString() < other.GetAsString(); 140} 141 142bool URLPattern::operator>(const URLPattern& other) const { 143 return GetAsString() > other.GetAsString(); 144} 145 146bool URLPattern::operator==(const URLPattern& other) const { 147 return GetAsString() == other.GetAsString(); 148} 149 150URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) { 151 spec_.clear(); 152 SetMatchAllURLs(false); 153 SetMatchSubdomains(false); 154 SetPort("*"); 155 156 // Special case pattern to match every valid URL. 157 if (pattern == kAllUrlsPattern) { 158 SetMatchAllURLs(true); 159 return PARSE_SUCCESS; 160 } 161 162 // Parse out the scheme. 163 size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator); 164 bool has_standard_scheme_separator = true; 165 166 // Some urls also use ':' alone as the scheme separator. 167 if (scheme_end_pos == std::string::npos) { 168 scheme_end_pos = pattern.find(':'); 169 has_standard_scheme_separator = false; 170 } 171 172 if (scheme_end_pos == std::string::npos) 173 return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; 174 175 if (!SetScheme(pattern.substr(0, scheme_end_pos))) 176 return PARSE_ERROR_INVALID_SCHEME; 177 178 bool standard_scheme = IsStandardScheme(scheme_); 179 if (standard_scheme != has_standard_scheme_separator) 180 return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; 181 182 // Advance past the scheme separator. 183 scheme_end_pos += 184 (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1); 185 if (scheme_end_pos >= pattern.size()) 186 return PARSE_ERROR_EMPTY_HOST; 187 188 // Parse out the host and path. 189 size_t host_start_pos = scheme_end_pos; 190 size_t path_start_pos = 0; 191 192 if (!standard_scheme) { 193 path_start_pos = host_start_pos; 194 } else if (scheme_ == chrome::kFileScheme) { 195 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); 196 if (host_end_pos == std::string::npos) { 197 // Allow hostname omission. 198 // e.g. file://* is interpreted as file:///*, 199 // file://foo* is interpreted as file:///foo*. 200 path_start_pos = host_start_pos - 1; 201 } else { 202 // Ignore hostname if scheme is file://. 203 // e.g. file://localhost/foo is equal to file:///foo. 204 path_start_pos = host_end_pos; 205 } 206 } else { 207 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); 208 209 // Host is required. 210 if (host_start_pos == host_end_pos) 211 return PARSE_ERROR_EMPTY_HOST; 212 213 if (host_end_pos == std::string::npos) 214 return PARSE_ERROR_EMPTY_PATH; 215 216 host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); 217 218 // The first component can optionally be '*' to match all subdomains. 219 std::vector<std::string> host_components; 220 base::SplitString(host_, '.', &host_components); 221 if (host_components[0] == "*") { 222 match_subdomains_ = true; 223 host_components.erase(host_components.begin(), 224 host_components.begin() + 1); 225 } 226 host_ = JoinString(host_components, '.'); 227 228 path_start_pos = host_end_pos; 229 } 230 231 SetPath(pattern.substr(path_start_pos)); 232 233 size_t port_pos = host_.find(':'); 234 if (port_pos != std::string::npos) { 235 if (!SetPort(host_.substr(port_pos + 1))) 236 return PARSE_ERROR_INVALID_PORT; 237 host_ = host_.substr(0, port_pos); 238 } 239 240 // No other '*' can occur in the host, though. This isn't necessary, but is 241 // done as a convenience to developers who might otherwise be confused and 242 // think '*' works as a glob in the host. 243 if (host_.find('*') != std::string::npos) 244 return PARSE_ERROR_INVALID_HOST_WILDCARD; 245 246 return PARSE_SUCCESS; 247} 248 249void URLPattern::SetValidSchemes(int valid_schemes) { 250 spec_.clear(); 251 valid_schemes_ = valid_schemes; 252} 253 254void URLPattern::SetHost(const std::string& host) { 255 spec_.clear(); 256 host_ = host; 257} 258 259void URLPattern::SetMatchAllURLs(bool val) { 260 spec_.clear(); 261 match_all_urls_ = val; 262 263 if (val) { 264 match_subdomains_ = true; 265 scheme_ = "*"; 266 host_.clear(); 267 SetPath("/*"); 268 } 269} 270 271void URLPattern::SetMatchSubdomains(bool val) { 272 spec_.clear(); 273 match_subdomains_ = val; 274} 275 276bool URLPattern::SetScheme(const std::string& scheme) { 277 spec_.clear(); 278 scheme_ = scheme; 279 if (scheme_ == "*") { 280 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); 281 } else if (!IsValidScheme(scheme_)) { 282 return false; 283 } 284 return true; 285} 286 287bool URLPattern::IsValidScheme(const std::string& scheme) const { 288 if (valid_schemes_ == SCHEME_ALL) 289 return true; 290 291 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 292 if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) 293 return true; 294 } 295 296 return false; 297} 298 299void URLPattern::SetPath(const std::string& path) { 300 spec_.clear(); 301 path_ = path; 302 path_escaped_ = path_; 303 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); 304 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); 305} 306 307bool URLPattern::SetPort(const std::string& port) { 308 spec_.clear(); 309 if (IsValidPortForScheme(scheme_, port)) { 310 port_ = port; 311 return true; 312 } 313 return false; 314} 315 316bool URLPattern::MatchesURL(const GURL& test) const { 317 const GURL* test_url = &test; 318 bool has_inner_url = test.inner_url() != NULL; 319 320 if (has_inner_url) { 321 if (!test.SchemeIsFileSystem()) 322 return false; // The only nested URLs we handle are filesystem URLs. 323 test_url = test.inner_url(); 324 } 325 326 if (!MatchesScheme(test_url->scheme())) 327 return false; 328 329 if (match_all_urls_) 330 return true; 331 332 std::string path_for_request = test.PathForRequest(); 333 if (has_inner_url) 334 path_for_request = test_url->path() + path_for_request; 335 336 return MatchesSecurityOriginHelper(*test_url) && 337 MatchesPath(path_for_request); 338} 339 340bool URLPattern::MatchesSecurityOrigin(const GURL& test) const { 341 const GURL* test_url = &test; 342 bool has_inner_url = test.inner_url() != NULL; 343 344 if (has_inner_url) { 345 if (!test.SchemeIsFileSystem()) 346 return false; // The only nested URLs we handle are filesystem URLs. 347 test_url = test.inner_url(); 348 } 349 350 if (!MatchesScheme(test_url->scheme())) 351 return false; 352 353 if (match_all_urls_) 354 return true; 355 356 return MatchesSecurityOriginHelper(*test_url); 357} 358 359bool URLPattern::MatchesScheme(const std::string& test) const { 360 if (!IsValidScheme(test)) 361 return false; 362 363 return scheme_ == "*" || test == scheme_; 364} 365 366bool URLPattern::MatchesHost(const std::string& host) const { 367 std::string test(content::kHttpScheme); 368 test += content::kStandardSchemeSeparator; 369 test += host; 370 test += "/"; 371 return MatchesHost(GURL(test)); 372} 373 374bool URLPattern::MatchesHost(const GURL& test) const { 375 // If the hosts are exactly equal, we have a match. 376 if (test.host() == host_) 377 return true; 378 379 // If we're matching subdomains, and we have no host in the match pattern, 380 // that means that we're matching all hosts, which means we have a match no 381 // matter what the test host is. 382 if (match_subdomains_ && host_.empty()) 383 return true; 384 385 // Otherwise, we can only match if our match pattern matches subdomains. 386 if (!match_subdomains_) 387 return false; 388 389 // We don't do subdomain matching against IP addresses, so we can give up now 390 // if the test host is an IP address. 391 if (test.HostIsIPAddress()) 392 return false; 393 394 // Check if the test host is a subdomain of our host. 395 if (test.host().length() <= (host_.length() + 1)) 396 return false; 397 398 if (test.host().compare(test.host().length() - host_.length(), 399 host_.length(), host_) != 0) 400 return false; 401 402 return test.host()[test.host().length() - host_.length() - 1] == '.'; 403} 404 405bool URLPattern::MatchesPath(const std::string& test) const { 406 // Make the behaviour of OverlapsWith consistent with MatchesURL, which is 407 // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'. 408 if (test + "/*" == path_escaped_) 409 return true; 410 411 return MatchPattern(test, path_escaped_); 412} 413 414const std::string& URLPattern::GetAsString() const { 415 if (!spec_.empty()) 416 return spec_; 417 418 if (match_all_urls_) { 419 spec_ = kAllUrlsPattern; 420 return spec_; 421 } 422 423 bool standard_scheme = IsStandardScheme(scheme_); 424 425 std::string spec = scheme_ + 426 (standard_scheme ? content::kStandardSchemeSeparator : ":"); 427 428 if (scheme_ != chrome::kFileScheme && standard_scheme) { 429 if (match_subdomains_) { 430 spec += "*"; 431 if (!host_.empty()) 432 spec += "."; 433 } 434 435 if (!host_.empty()) 436 spec += host_; 437 438 if (port_ != "*") { 439 spec += ":"; 440 spec += port_; 441 } 442 } 443 444 if (!path_.empty()) 445 spec += path_; 446 447 spec_ = spec; 448 return spec_; 449} 450 451bool URLPattern::OverlapsWith(const URLPattern& other) const { 452 if (match_all_urls() || other.match_all_urls()) 453 return true; 454 return (MatchesAnyScheme(other.GetExplicitSchemes()) || 455 other.MatchesAnyScheme(GetExplicitSchemes())) 456 && (MatchesHost(other.host()) || other.MatchesHost(host())) 457 && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port())) 458 && (MatchesPath(StripTrailingWildcard(other.path())) || 459 other.MatchesPath(StripTrailingWildcard(path()))); 460} 461 462bool URLPattern::Contains(const URLPattern& other) const { 463 if (match_all_urls()) 464 return true; 465 return MatchesAllSchemes(other.GetExplicitSchemes()) 466 && MatchesHost(other.host()) 467 && MatchesPortPattern(other.port()) 468 && MatchesPath(StripTrailingWildcard(other.path())); 469} 470 471bool URLPattern::MatchesAnyScheme( 472 const std::vector<std::string>& schemes) const { 473 for (std::vector<std::string>::const_iterator i = schemes.begin(); 474 i != schemes.end(); ++i) { 475 if (MatchesScheme(*i)) 476 return true; 477 } 478 479 return false; 480} 481 482bool URLPattern::MatchesAllSchemes( 483 const std::vector<std::string>& schemes) const { 484 for (std::vector<std::string>::const_iterator i = schemes.begin(); 485 i != schemes.end(); ++i) { 486 if (!MatchesScheme(*i)) 487 return false; 488 } 489 490 return true; 491} 492 493bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const { 494 // Ignore hostname if scheme is file://. 495 if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) 496 return false; 497 498 if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort()))) 499 return false; 500 501 return true; 502} 503 504bool URLPattern::MatchesPortPattern(const std::string& port) const { 505 return port_ == "*" || port_ == port; 506} 507 508std::vector<std::string> URLPattern::GetExplicitSchemes() const { 509 std::vector<std::string> result; 510 511 if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { 512 result.push_back(scheme_); 513 return result; 514 } 515 516 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 517 if (MatchesScheme(kValidSchemes[i])) { 518 result.push_back(kValidSchemes[i]); 519 } 520 } 521 522 return result; 523} 524 525std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { 526 std::vector<std::string> explicit_schemes = GetExplicitSchemes(); 527 std::vector<URLPattern> result; 528 529 for (std::vector<std::string>::const_iterator i = explicit_schemes.begin(); 530 i != explicit_schemes.end(); ++i) { 531 URLPattern temp = *this; 532 temp.SetScheme(*i); 533 temp.SetMatchAllURLs(false); 534 result.push_back(temp); 535 } 536 537 return result; 538} 539 540// static 541const char* URLPattern::GetParseResultString( 542 URLPattern::ParseResult parse_result) { 543 return kParseResultMessages[parse_result]; 544} 545