content_settings_pattern.cc revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/common/content_settings_pattern.h" 6 7#include <vector> 8 9#include "base/memory/scoped_ptr.h" 10#include "base/string_util.h" 11#include "base/strings/string_split.h" 12#include "chrome/common/content_settings_pattern_parser.h" 13#include "chrome/common/render_messages.h" 14#include "chrome/common/url_constants.h" 15#include "extensions/common/constants.h" 16#include "googleurl/src/gurl.h" 17#include "googleurl/src/url_canon.h" 18#include "ipc/ipc_message_utils.h" 19#include "net/base/dns_util.h" 20#include "net/base/net_util.h" 21 22namespace { 23 24std::string GetDefaultPort(const std::string& scheme) { 25 if (scheme == chrome::kHttpScheme) 26 return "80"; 27 if (scheme == chrome::kHttpsScheme) 28 return "443"; 29 return ""; 30} 31 32// Returns true if |sub_domain| is a sub domain or equls |domain|. E.g. 33// "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a 34// subdomain of "host.com". 35bool IsSubDomainOrEqual(const std::string& sub_domain, 36 const std::string& domain) { 37 // The empty string serves as wildcard. Each domain is a subdomain of the 38 // wildcard. 39 if (domain.empty()) 40 return true; 41 const size_t match = sub_domain.rfind(domain); 42 if (match == std::string::npos || 43 (match > 0 && sub_domain[match - 1] != '.') || 44 (match + domain.length() != sub_domain.length())) { 45 return false; 46 } 47 return true; 48} 49 50// Compares two domain names. 51int CompareDomainNames(const std::string& str1, const std::string& str2) { 52 std::vector<std::string> domain_name1; 53 std::vector<std::string> domain_name2; 54 55 base::SplitString(str1, '.', &domain_name1); 56 base::SplitString(str2, '.', &domain_name2); 57 58 int i1 = domain_name1.size() - 1; 59 int i2 = domain_name2.size() - 1; 60 int rv; 61 while (i1 >= 0 && i2 >= 0) { 62 // domain names are stored in puny code. So it's fine to use the compare 63 // method. 64 rv = domain_name1[i1].compare(domain_name2[i2]); 65 if (rv != 0) 66 return rv; 67 --i1; 68 --i2; 69 } 70 71 if (i1 > i2) 72 return 1; 73 74 if (i1 < i2) 75 return -1; 76 77 // The domain names are identical. 78 return 0; 79} 80 81typedef ContentSettingsPattern::BuilderInterface BuilderInterface; 82 83} // namespace 84 85// //////////////////////////////////////////////////////////////////////////// 86// ContentSettingsPattern::Builder 87// 88ContentSettingsPattern::Builder::Builder(bool use_legacy_validate) 89 : is_valid_(true), 90 use_legacy_validate_(use_legacy_validate) {} 91 92ContentSettingsPattern::Builder::~Builder() {} 93 94BuilderInterface* ContentSettingsPattern::Builder::WithPort( 95 const std::string& port) { 96 parts_.port = port; 97 parts_.is_port_wildcard = false; 98 return this; 99} 100 101BuilderInterface* ContentSettingsPattern::Builder::WithPortWildcard() { 102 parts_.port = ""; 103 parts_.is_port_wildcard = true; 104 return this; 105} 106 107BuilderInterface* ContentSettingsPattern::Builder::WithHost( 108 const std::string& host) { 109 parts_.host = host; 110 return this; 111} 112 113BuilderInterface* ContentSettingsPattern::Builder::WithDomainWildcard() { 114 parts_.has_domain_wildcard = true; 115 return this; 116} 117 118BuilderInterface* ContentSettingsPattern::Builder::WithScheme( 119 const std::string& scheme) { 120 parts_.scheme = scheme; 121 parts_.is_scheme_wildcard = false; 122 return this; 123} 124 125BuilderInterface* ContentSettingsPattern::Builder::WithSchemeWildcard() { 126 parts_.scheme = ""; 127 parts_.is_scheme_wildcard = true; 128 return this; 129} 130 131BuilderInterface* ContentSettingsPattern::Builder::WithPath( 132 const std::string& path) { 133 parts_.path = path; 134 parts_.is_path_wildcard = false; 135 return this; 136} 137 138BuilderInterface* ContentSettingsPattern::Builder::WithPathWildcard() { 139 parts_.path = ""; 140 parts_.is_path_wildcard = true; 141 return this; 142} 143 144BuilderInterface* ContentSettingsPattern::Builder::Invalid() { 145 is_valid_ = false; 146 return this; 147} 148 149ContentSettingsPattern ContentSettingsPattern::Builder::Build() { 150 if (!is_valid_) 151 return ContentSettingsPattern(); 152 if (!Canonicalize(&parts_)) 153 return ContentSettingsPattern(); 154 if (use_legacy_validate_) { 155 is_valid_ = LegacyValidate(parts_); 156 } else { 157 is_valid_ = Validate(parts_); 158 } 159 return ContentSettingsPattern(parts_, is_valid_); 160} 161 162// static 163bool ContentSettingsPattern::Builder::Canonicalize(PatternParts* parts) { 164 // Canonicalize the scheme part. 165 const std::string scheme(StringToLowerASCII(parts->scheme)); 166 parts->scheme = scheme; 167 168 if (parts->scheme == std::string(chrome::kFileScheme) && 169 !parts->is_path_wildcard) { 170 GURL url(std::string(chrome::kFileScheme) + 171 std::string(content::kStandardSchemeSeparator) + parts->path); 172 parts->path = url.path(); 173 } 174 175 // Canonicalize the host part. 176 const std::string host(parts->host); 177 url_canon::CanonHostInfo host_info; 178 std::string canonicalized_host(net::CanonicalizeHost(host, &host_info)); 179 if (host_info.IsIPAddress() && parts->has_domain_wildcard) 180 return false; 181 canonicalized_host = net::TrimEndingDot(canonicalized_host); 182 183 parts->host = ""; 184 if ((host.find('*') == std::string::npos) && 185 !canonicalized_host.empty()) { 186 // Valid host. 187 parts->host += canonicalized_host; 188 } 189 return true; 190} 191 192// static 193bool ContentSettingsPattern::Builder::Validate(const PatternParts& parts) { 194 // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}. 195 if ((parts.is_scheme_wildcard && !parts.scheme.empty()) || 196 (parts.is_port_wildcard && !parts.port.empty())) { 197 NOTREACHED(); 198 return false; 199 } 200 201 // file:// URL patterns have an empty host and port. 202 if (parts.scheme == std::string(chrome::kFileScheme)) { 203 if (parts.has_domain_wildcard || !parts.host.empty() || !parts.port.empty()) 204 return false; 205 if (parts.is_path_wildcard) 206 return parts.path.empty(); 207 return (!parts.path.empty() && 208 parts.path != "/" && 209 parts.path.find("*") == std::string::npos); 210 } 211 212 // If the pattern is for an extension URL test if it is valid. 213 if (parts.scheme == std::string(extensions::kExtensionScheme) && 214 parts.port.empty() && 215 !parts.is_port_wildcard) { 216 return true; 217 } 218 219 // Non-file patterns are invalid if either the scheme, host or port part is 220 // empty. 221 if ((parts.scheme.empty() && !parts.is_scheme_wildcard) || 222 (parts.host.empty() && !parts.has_domain_wildcard) || 223 (parts.port.empty() && !parts.is_port_wildcard)) { 224 return false; 225 } 226 227 if (parts.host.find("*") != std::string::npos) 228 return false; 229 230 // Test if the scheme is supported or a wildcard. 231 if (!parts.is_scheme_wildcard && 232 parts.scheme != std::string(chrome::kHttpScheme) && 233 parts.scheme != std::string(chrome::kHttpsScheme)) { 234 return false; 235 } 236 return true; 237} 238 239// static 240bool ContentSettingsPattern::Builder::LegacyValidate( 241 const PatternParts& parts) { 242 // If the pattern is for a "file-pattern" test if it is valid. 243 if (parts.scheme == std::string(chrome::kFileScheme) && 244 !parts.is_scheme_wildcard && 245 parts.host.empty() && 246 parts.port.empty()) 247 return true; 248 249 // If the pattern is for an extension URL test if it is valid. 250 if (parts.scheme == std::string(extensions::kExtensionScheme) && 251 !parts.is_scheme_wildcard && 252 !parts.host.empty() && 253 !parts.has_domain_wildcard && 254 parts.port.empty() && 255 !parts.is_port_wildcard) 256 return true; 257 258 // Non-file patterns are invalid if either the scheme, host or port part is 259 // empty. 260 if ((!parts.is_scheme_wildcard) || 261 (parts.host.empty() && !parts.has_domain_wildcard) || 262 (!parts.is_port_wildcard)) 263 return false; 264 265 // Test if the scheme is supported or a wildcard. 266 if (!parts.is_scheme_wildcard && 267 parts.scheme != std::string(chrome::kHttpScheme) && 268 parts.scheme != std::string(chrome::kHttpsScheme)) { 269 return false; 270 } 271 return true; 272} 273 274// //////////////////////////////////////////////////////////////////////////// 275// ContentSettingsPattern::PatternParts 276// 277ContentSettingsPattern::PatternParts::PatternParts() 278 : is_scheme_wildcard(false), 279 has_domain_wildcard(false), 280 is_port_wildcard(false), 281 is_path_wildcard(false) {} 282 283ContentSettingsPattern::PatternParts::~PatternParts() {} 284 285// //////////////////////////////////////////////////////////////////////////// 286// ContentSettingsPattern 287// 288 289// The version of the pattern format implemented. Version 1 includes the 290// following patterns: 291// - [*.]domain.tld (matches domain.tld and all sub-domains) 292// - host (matches an exact hostname) 293// - a.b.c.d (matches an exact IPv4 ip) 294// - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip) 295// - file:///tmp/test.html (a complete URL without a host) 296// Version 2 adds a resource identifier for plugins. 297// TODO(jochen): update once this feature is no longer behind a flag. 298const int ContentSettingsPattern::kContentSettingsPatternVersion = 1; 299 300// TODO(markusheintz): These two constants were moved to the Pattern Parser. 301// Remove once the dependency of the ContentSettingsBaseProvider is removed. 302const char* ContentSettingsPattern::kDomainWildcard = "[*.]"; 303const size_t ContentSettingsPattern::kDomainWildcardLength = 4; 304 305// static 306BuilderInterface* ContentSettingsPattern::CreateBuilder( 307 bool validate) { 308 return new Builder(validate); 309} 310 311// static 312ContentSettingsPattern ContentSettingsPattern::FromURL( 313 const GURL& url) { 314 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 315 ContentSettingsPattern::CreateBuilder(false)); 316 317 const GURL* local_url = &url; 318 if (url.SchemeIsFileSystem() && url.inner_url()) { 319 local_url = url.inner_url(); 320 } 321 if (local_url->SchemeIsFile()) { 322 builder->WithScheme(local_url->scheme())->WithPath(local_url->path()); 323 } else { 324 // Please keep the order of the ifs below as URLs with an IP as host can 325 // also have a "http" scheme. 326 if (local_url->HostIsIPAddress()) { 327 builder->WithScheme(local_url->scheme())->WithHost(local_url->host()); 328 } else if (local_url->SchemeIs(chrome::kHttpScheme)) { 329 builder->WithSchemeWildcard()->WithDomainWildcard()->WithHost( 330 local_url->host()); 331 } else if (local_url->SchemeIs(chrome::kHttpsScheme)) { 332 builder->WithScheme(local_url->scheme())->WithDomainWildcard()->WithHost( 333 local_url->host()); 334 } else { 335 // Unsupported scheme 336 } 337 if (local_url->port().empty()) { 338 if (local_url->SchemeIs(chrome::kHttpsScheme)) 339 builder->WithPort(GetDefaultPort(chrome::kHttpsScheme)); 340 else 341 builder->WithPortWildcard(); 342 } else { 343 builder->WithPort(local_url->port()); 344 } 345 } 346 return builder->Build(); 347} 348 349// static 350ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard( 351 const GURL& url) { 352 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 353 ContentSettingsPattern::CreateBuilder(false)); 354 355 const GURL* local_url = &url; 356 if (url.SchemeIsFileSystem() && url.inner_url()) { 357 local_url = url.inner_url(); 358 } 359 if (local_url->SchemeIsFile()) { 360 builder->WithScheme(local_url->scheme())->WithPath(local_url->path()); 361 } else { 362 builder->WithScheme(local_url->scheme())->WithHost(local_url->host()); 363 if (local_url->port().empty()) { 364 builder->WithPort(GetDefaultPort(local_url->scheme())); 365 } else { 366 builder->WithPort(local_url->port()); 367 } 368 } 369 return builder->Build(); 370} 371 372// static 373ContentSettingsPattern ContentSettingsPattern::FromString( 374 const std::string& pattern_spec) { 375 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 376 ContentSettingsPattern::CreateBuilder(false)); 377 content_settings::PatternParser::Parse(pattern_spec, builder.get()); 378 return builder->Build(); 379} 380 381// static 382ContentSettingsPattern ContentSettingsPattern::LegacyFromString( 383 const std::string& pattern_spec) { 384 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 385 ContentSettingsPattern::CreateBuilder(true)); 386 content_settings::PatternParser::Parse(pattern_spec, builder.get()); 387 return builder->Build(); 388} 389 390// static 391ContentSettingsPattern ContentSettingsPattern::Wildcard() { 392 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 393 ContentSettingsPattern::CreateBuilder(true)); 394 builder->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()-> 395 WithPathWildcard(); 396 return builder->Build(); 397} 398 399ContentSettingsPattern::ContentSettingsPattern() 400 : is_valid_(false) { 401} 402 403ContentSettingsPattern::ContentSettingsPattern( 404 const PatternParts& parts, 405 bool valid) 406 : parts_(parts), 407 is_valid_(valid) { 408} 409 410void ContentSettingsPattern::WriteToMessage(IPC::Message* m) const { 411 IPC::WriteParam(m, is_valid_); 412 IPC::WriteParam(m, parts_); 413} 414 415bool ContentSettingsPattern::ReadFromMessage(const IPC::Message* m, 416 PickleIterator* iter) { 417 return IPC::ReadParam(m, iter, &is_valid_) && 418 IPC::ReadParam(m, iter, &parts_); 419} 420 421bool ContentSettingsPattern::Matches( 422 const GURL& url) const { 423 // An invalid pattern matches nothing. 424 if (!is_valid_) 425 return false; 426 427 const GURL* local_url = &url; 428 if (url.SchemeIsFileSystem() && url.inner_url()) { 429 local_url = url.inner_url(); 430 } 431 432 // Match the scheme part. 433 const std::string scheme(local_url->scheme()); 434 if (!parts_.is_scheme_wildcard && 435 parts_.scheme != scheme) { 436 return false; 437 } 438 439 // File URLs have no host. Matches if the pattern has the path wildcard set, 440 // or if the path in the URL is identical to the one in the pattern. 441 // For filesystem:file URLs, the path used is the filesystem type, so all 442 // filesystem:file:///temporary/... are equivalent. 443 // TODO(markusheintz): Content settings should be defined for all files on 444 // a machine. Unless there is a good use case for supporting paths for file 445 // patterns, stop supporting path for file patterns. 446 if (!parts_.is_scheme_wildcard && scheme == chrome::kFileScheme) 447 return parts_.is_path_wildcard || 448 parts_.path == std::string(local_url->path()); 449 450 // Match the host part. 451 const std::string host(net::TrimEndingDot(local_url->host())); 452 if (!parts_.has_domain_wildcard) { 453 if (parts_.host != host) 454 return false; 455 } else { 456 if (!IsSubDomainOrEqual(host, parts_.host)) 457 return false; 458 } 459 460 // For chrome extensions URLs ignore the port. 461 if (parts_.scheme == std::string(extensions::kExtensionScheme)) 462 return true; 463 464 // Match the port part. 465 std::string port(local_url->port()); 466 467 // Use the default port if the port string is empty. GURL returns an empty 468 // string if no port at all was specified or if the default port was 469 // specified. 470 if (port.empty()) { 471 port = GetDefaultPort(scheme); 472 } 473 474 if (!parts_.is_port_wildcard && 475 parts_.port != port ) { 476 return false; 477 } 478 479 return true; 480} 481 482bool ContentSettingsPattern::MatchesAllHosts() const { 483 return parts_.has_domain_wildcard && parts_.host.empty(); 484} 485 486const std::string ContentSettingsPattern::ToString() const { 487 if (IsValid()) 488 return content_settings::PatternParser::ToString(parts_); 489 else 490 return ""; 491} 492 493ContentSettingsPattern::Relation ContentSettingsPattern::Compare( 494 const ContentSettingsPattern& other) const { 495 // Two invalid patterns are identical in the way they behave. They don't match 496 // anything and are represented as an empty string. So it's fair to treat them 497 // as identical. 498 if ((this == &other) || 499 (!is_valid_ && !other.is_valid_)) 500 return IDENTITY; 501 502 if (!is_valid_ && other.is_valid_) 503 return DISJOINT_ORDER_POST; 504 if (is_valid_ && !other.is_valid_) 505 return DISJOINT_ORDER_PRE; 506 507 // If either host, port or scheme are disjoint return immediately. 508 Relation host_relation = CompareHost(parts_, other.parts_); 509 if (host_relation == DISJOINT_ORDER_PRE || 510 host_relation == DISJOINT_ORDER_POST) 511 return host_relation; 512 513 Relation port_relation = ComparePort(parts_, other.parts_); 514 if (port_relation == DISJOINT_ORDER_PRE || 515 port_relation == DISJOINT_ORDER_POST) 516 return port_relation; 517 518 Relation scheme_relation = CompareScheme(parts_, other.parts_); 519 if (scheme_relation == DISJOINT_ORDER_PRE || 520 scheme_relation == DISJOINT_ORDER_POST) 521 return scheme_relation; 522 523 if (host_relation != IDENTITY) 524 return host_relation; 525 if (port_relation != IDENTITY) 526 return port_relation; 527 return scheme_relation; 528} 529 530bool ContentSettingsPattern::operator==( 531 const ContentSettingsPattern& other) const { 532 return Compare(other) == IDENTITY; 533} 534 535bool ContentSettingsPattern::operator!=( 536 const ContentSettingsPattern& other) const { 537 return !(*this == other); 538} 539 540bool ContentSettingsPattern::operator<( 541 const ContentSettingsPattern& other) const { 542 return Compare(other) < 0; 543} 544 545bool ContentSettingsPattern::operator>( 546 const ContentSettingsPattern& other) const { 547 return Compare(other) > 0; 548} 549 550// static 551ContentSettingsPattern::Relation ContentSettingsPattern::CompareHost( 552 const ContentSettingsPattern::PatternParts& parts, 553 const ContentSettingsPattern::PatternParts& other_parts) { 554 if (!parts.has_domain_wildcard && !other_parts.has_domain_wildcard) { 555 // Case 1: No host starts with a wild card 556 int result = CompareDomainNames(parts.host, other_parts.host); 557 if (result == 0) 558 return ContentSettingsPattern::IDENTITY; 559 if (result < 0) 560 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 561 return ContentSettingsPattern::DISJOINT_ORDER_POST; 562 } else if (parts.has_domain_wildcard && !other_parts.has_domain_wildcard) { 563 // Case 2: |host| starts with a domain wildcard and |other_host| does not 564 // start with a domain wildcard. 565 // Examples: 566 // "this" host: [*.]google.com 567 // "other" host: google.com 568 // 569 // [*.]google.com 570 // mail.google.com 571 // 572 // [*.]mail.google.com 573 // google.com 574 // 575 // [*.]youtube.com 576 // google.de 577 // 578 // [*.]youtube.com 579 // mail.google.com 580 // 581 // * 582 // google.de 583 if (IsSubDomainOrEqual(other_parts.host, parts.host)) { 584 return ContentSettingsPattern::SUCCESSOR; 585 } else { 586 if (CompareDomainNames(parts.host, other_parts.host) < 0) 587 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 588 return ContentSettingsPattern::DISJOINT_ORDER_POST; 589 } 590 } else if (!parts.has_domain_wildcard && other_parts.has_domain_wildcard) { 591 // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts 592 // with a domain wildcard. 593 if (IsSubDomainOrEqual(parts.host, other_parts.host)) { 594 return ContentSettingsPattern::PREDECESSOR; 595 } else { 596 if (CompareDomainNames(parts.host, other_parts.host) < 0) 597 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 598 return ContentSettingsPattern::DISJOINT_ORDER_POST; 599 } 600 } else if (parts.has_domain_wildcard && other_parts.has_domain_wildcard) { 601 // Case 4: |host| and |other_host| both start with a domain wildcard. 602 // Examples: 603 // [*.]google.com 604 // [*.]google.com 605 // 606 // [*.]google.com 607 // [*.]mail.google.com 608 // 609 // [*.]youtube.com 610 // [*.]google.de 611 // 612 // [*.]youtube.com 613 // [*.]mail.google.com 614 // 615 // [*.]youtube.com 616 // * 617 // 618 // * 619 // [*.]youtube.com 620 if (parts.host == other_parts.host) { 621 return ContentSettingsPattern::IDENTITY; 622 } else if (IsSubDomainOrEqual(other_parts.host, parts.host)) { 623 return ContentSettingsPattern::SUCCESSOR; 624 } else if (IsSubDomainOrEqual(parts.host, other_parts.host)) { 625 return ContentSettingsPattern::PREDECESSOR; 626 } else { 627 if (CompareDomainNames(parts.host, other_parts.host) < 0) 628 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 629 return ContentSettingsPattern::DISJOINT_ORDER_POST; 630 } 631 } 632 633 NOTREACHED(); 634 return ContentSettingsPattern::IDENTITY; 635} 636 637// static 638ContentSettingsPattern::Relation ContentSettingsPattern::CompareScheme( 639 const ContentSettingsPattern::PatternParts& parts, 640 const ContentSettingsPattern::PatternParts& other_parts) { 641 if (parts.is_scheme_wildcard && !other_parts.is_scheme_wildcard) 642 return ContentSettingsPattern::SUCCESSOR; 643 if (!parts.is_scheme_wildcard && other_parts.is_scheme_wildcard) 644 return ContentSettingsPattern::PREDECESSOR; 645 646 int result = parts.scheme.compare(other_parts.scheme); 647 if (result == 0) 648 return ContentSettingsPattern::IDENTITY; 649 if (result > 0) 650 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 651 return ContentSettingsPattern::DISJOINT_ORDER_POST; 652} 653 654// static 655ContentSettingsPattern::Relation ContentSettingsPattern::ComparePort( 656 const ContentSettingsPattern::PatternParts& parts, 657 const ContentSettingsPattern::PatternParts& other_parts) { 658 if (parts.is_port_wildcard && !other_parts.is_port_wildcard) 659 return ContentSettingsPattern::SUCCESSOR; 660 if (!parts.is_port_wildcard && other_parts.is_port_wildcard) 661 return ContentSettingsPattern::PREDECESSOR; 662 663 int result = parts.port.compare(other_parts.port); 664 if (result == 0) 665 return ContentSettingsPattern::IDENTITY; 666 if (result > 0) 667 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 668 return ContentSettingsPattern::DISJOINT_ORDER_POST; 669} 670