content_settings_pattern.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/common/content_settings_pattern.h" 6 7#include <vector> 8 9#include "base/memory/scoped_ptr.h" 10#include "base/string_split.h" 11#include "base/string_util.h" 12#include "chrome/common/content_settings_pattern_parser.h" 13#include "chrome/common/render_messages.h" 14#include "chrome/common/url_constants.h" 15#include "googleurl/src/gurl.h" 16#include "googleurl/src/url_canon.h" 17#include "ipc/ipc_message_utils.h" 18#include "net/base/dns_util.h" 19#include "net/base/net_util.h" 20 21namespace { 22 23std::string GetDefaultPort(const std::string& scheme) { 24 if (scheme == chrome::kHttpScheme) 25 return "80"; 26 if (scheme == chrome::kHttpsScheme) 27 return "443"; 28 return ""; 29} 30 31// Returns true if |sub_domain| is a sub domain or equls |domain|. E.g. 32// "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a 33// subdomain of "host.com". 34bool IsSubDomainOrEqual(const std::string& sub_domain, 35 const std::string& domain) { 36 // The empty string serves as wildcard. Each domain is a subdomain of the 37 // wildcard. 38 if (domain.empty()) 39 return true; 40 const size_t match = sub_domain.rfind(domain); 41 if (match == std::string::npos || 42 (match > 0 && sub_domain[match - 1] != '.') || 43 (match + domain.length() != sub_domain.length())) { 44 return false; 45 } 46 return true; 47} 48 49// Compares two domain names. 50int CompareDomainNames(const std::string& str1, const std::string& str2) { 51 std::vector<std::string> domain_name1; 52 std::vector<std::string> domain_name2; 53 54 base::SplitString(str1, '.', &domain_name1); 55 base::SplitString(str2, '.', &domain_name2); 56 57 int i1 = domain_name1.size() - 1; 58 int i2 = domain_name2.size() - 1; 59 int rv; 60 while (i1 >= 0 && i2 >= 0) { 61 // domain names are stored in puny code. So it's fine to use the compare 62 // method. 63 rv = domain_name1[i1].compare(domain_name2[i2]); 64 if (rv != 0) 65 return rv; 66 --i1; 67 --i2; 68 } 69 70 if (i1 > i2) 71 return 1; 72 73 if (i1 < i2) 74 return -1; 75 76 // The domain names are identical. 77 return 0; 78} 79 80typedef ContentSettingsPattern::BuilderInterface BuilderInterface; 81 82} // namespace 83 84// //////////////////////////////////////////////////////////////////////////// 85// ContentSettingsPattern::Builder 86// 87ContentSettingsPattern::Builder::Builder(bool use_legacy_validate) 88 : is_valid_(true), 89 use_legacy_validate_(use_legacy_validate) {} 90 91ContentSettingsPattern::Builder::~Builder() {} 92 93BuilderInterface* ContentSettingsPattern::Builder::WithPort( 94 const std::string& port) { 95 parts_.port = port; 96 parts_.is_port_wildcard = false; 97 return this; 98} 99 100BuilderInterface* ContentSettingsPattern::Builder::WithPortWildcard() { 101 parts_.port = ""; 102 parts_.is_port_wildcard = true; 103 return this; 104} 105 106BuilderInterface* ContentSettingsPattern::Builder::WithHost( 107 const std::string& host) { 108 parts_.host = host; 109 return this; 110} 111 112BuilderInterface* ContentSettingsPattern::Builder::WithDomainWildcard() { 113 parts_.has_domain_wildcard = true; 114 return this; 115} 116 117BuilderInterface* ContentSettingsPattern::Builder::WithScheme( 118 const std::string& scheme) { 119 parts_.scheme = scheme; 120 parts_.is_scheme_wildcard = false; 121 return this; 122} 123 124BuilderInterface* ContentSettingsPattern::Builder::WithSchemeWildcard() { 125 parts_.scheme = ""; 126 parts_.is_scheme_wildcard = true; 127 return this; 128} 129 130BuilderInterface* ContentSettingsPattern::Builder::WithPath( 131 const std::string& path) { 132 parts_.path = path; 133 parts_.is_path_wildcard = false; 134 return this; 135} 136 137BuilderInterface* ContentSettingsPattern::Builder::WithPathWildcard() { 138 parts_.path = ""; 139 parts_.is_path_wildcard = true; 140 return this; 141} 142 143BuilderInterface* ContentSettingsPattern::Builder::Invalid() { 144 is_valid_ = false; 145 return this; 146} 147 148ContentSettingsPattern ContentSettingsPattern::Builder::Build() { 149 if (!is_valid_) 150 return ContentSettingsPattern(); 151 if (!Canonicalize(&parts_)) 152 return ContentSettingsPattern(); 153 if (use_legacy_validate_) { 154 is_valid_ = LegacyValidate(parts_); 155 } else { 156 is_valid_ = Validate(parts_); 157 } 158 return ContentSettingsPattern(parts_, is_valid_); 159} 160 161// static 162bool ContentSettingsPattern::Builder::Canonicalize(PatternParts* parts) { 163 // Canonicalize the scheme part. 164 const std::string scheme(StringToLowerASCII(parts->scheme)); 165 parts->scheme = scheme; 166 167 if (parts->scheme == std::string(chrome::kFileScheme) && 168 !parts->is_path_wildcard) { 169 GURL url(std::string(chrome::kFileScheme) + 170 std::string(content::kStandardSchemeSeparator) + parts->path); 171 parts->path = url.path(); 172 } 173 174 // Canonicalize the host part. 175 const std::string host(parts->host); 176 url_canon::CanonHostInfo host_info; 177 std::string canonicalized_host(net::CanonicalizeHost(host, &host_info)); 178 if (host_info.IsIPAddress() && parts->has_domain_wildcard) 179 return false; 180 canonicalized_host = net::TrimEndingDot(canonicalized_host); 181 182 parts->host = ""; 183 if ((host.find('*') == std::string::npos) && 184 !canonicalized_host.empty()) { 185 // Valid host. 186 parts->host += canonicalized_host; 187 } 188 return true; 189} 190 191// static 192bool ContentSettingsPattern::Builder::Validate(const PatternParts& parts) { 193 // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}. 194 if ((parts.is_scheme_wildcard && !parts.scheme.empty()) || 195 (parts.is_port_wildcard && !parts.port.empty())) { 196 NOTREACHED(); 197 return false; 198 } 199 200 // file:// URL patterns have an empty host and port. 201 if (parts.scheme == std::string(chrome::kFileScheme)) { 202 if (parts.has_domain_wildcard || !parts.host.empty() || !parts.port.empty()) 203 return false; 204 if (parts.is_path_wildcard) 205 return parts.path.empty(); 206 return (!parts.path.empty() && 207 parts.path != "/" && 208 parts.path.find("*") == std::string::npos); 209 } 210 211 // If the pattern is for an extension URL test if it is valid. 212 if (parts.scheme == std::string(chrome::kExtensionScheme) && 213 parts.port.empty() && 214 !parts.is_port_wildcard) { 215 return true; 216 } 217 218 // Non-file patterns are invalid if either the scheme, host or port part is 219 // empty. 220 if ((parts.scheme.empty() && !parts.is_scheme_wildcard) || 221 (parts.host.empty() && !parts.has_domain_wildcard) || 222 (parts.port.empty() && !parts.is_port_wildcard)) { 223 return false; 224 } 225 226 if (parts.host.find("*") != std::string::npos) 227 return false; 228 229 // Test if the scheme is supported or a wildcard. 230 if (!parts.is_scheme_wildcard && 231 parts.scheme != std::string(chrome::kHttpScheme) && 232 parts.scheme != std::string(chrome::kHttpsScheme)) { 233 return false; 234 } 235 return true; 236} 237 238// static 239bool ContentSettingsPattern::Builder::LegacyValidate( 240 const PatternParts& parts) { 241 // If the pattern is for a "file-pattern" test if it is valid. 242 if (parts.scheme == std::string(chrome::kFileScheme) && 243 !parts.is_scheme_wildcard && 244 parts.host.empty() && 245 parts.port.empty()) 246 return true; 247 248 // If the pattern is for an extension URL test if it is valid. 249 if (parts.scheme == std::string(chrome::kExtensionScheme) && 250 !parts.is_scheme_wildcard && 251 !parts.host.empty() && 252 !parts.has_domain_wildcard && 253 parts.port.empty() && 254 !parts.is_port_wildcard) 255 return true; 256 257 // Non-file patterns are invalid if either the scheme, host or port part is 258 // empty. 259 if ((!parts.is_scheme_wildcard) || 260 (parts.host.empty() && !parts.has_domain_wildcard) || 261 (!parts.is_port_wildcard)) 262 return false; 263 264 // Test if the scheme is supported or a wildcard. 265 if (!parts.is_scheme_wildcard && 266 parts.scheme != std::string(chrome::kHttpScheme) && 267 parts.scheme != std::string(chrome::kHttpsScheme)) { 268 return false; 269 } 270 return true; 271} 272 273// //////////////////////////////////////////////////////////////////////////// 274// ContentSettingsPattern::PatternParts 275// 276ContentSettingsPattern::PatternParts::PatternParts() 277 : is_scheme_wildcard(false), 278 has_domain_wildcard(false), 279 is_port_wildcard(false), 280 is_path_wildcard(false) {} 281 282ContentSettingsPattern::PatternParts::~PatternParts() {} 283 284// //////////////////////////////////////////////////////////////////////////// 285// ContentSettingsPattern 286// 287 288// The version of the pattern format implemented. Version 1 includes the 289// following patterns: 290// - [*.]domain.tld (matches domain.tld and all sub-domains) 291// - host (matches an exact hostname) 292// - a.b.c.d (matches an exact IPv4 ip) 293// - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip) 294// - file:///tmp/test.html (a complete URL without a host) 295// Version 2 adds a resource identifier for plugins. 296// TODO(jochen): update once this feature is no longer behind a flag. 297const int ContentSettingsPattern::kContentSettingsPatternVersion = 1; 298 299// TODO(markusheintz): These two constants were moved to the Pattern Parser. 300// Remove once the dependency of the ContentSettingsBaseProvider is removed. 301const char* ContentSettingsPattern::kDomainWildcard = "[*.]"; 302const size_t ContentSettingsPattern::kDomainWildcardLength = 4; 303 304// static 305BuilderInterface* ContentSettingsPattern::CreateBuilder( 306 bool validate) { 307 return new Builder(validate); 308} 309 310// static 311ContentSettingsPattern ContentSettingsPattern::FromURL( 312 const GURL& url) { 313 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 314 ContentSettingsPattern::CreateBuilder(false)); 315 316 const GURL* local_url = &url; 317 if (url.SchemeIsFileSystem() && url.inner_url()) { 318 local_url = url.inner_url(); 319 } 320 if (local_url->SchemeIsFile()) { 321 builder->WithScheme(local_url->scheme())->WithPath(local_url->path()); 322 } else { 323 // Please keep the order of the ifs below as URLs with an IP as host can 324 // also have a "http" scheme. 325 if (local_url->HostIsIPAddress()) { 326 builder->WithScheme(local_url->scheme())->WithHost(local_url->host()); 327 } else if (local_url->SchemeIs(chrome::kHttpScheme)) { 328 builder->WithSchemeWildcard()->WithDomainWildcard()->WithHost( 329 local_url->host()); 330 } else if (local_url->SchemeIs(chrome::kHttpsScheme)) { 331 builder->WithScheme(local_url->scheme())->WithDomainWildcard()->WithHost( 332 local_url->host()); 333 } else { 334 // Unsupported scheme 335 } 336 if (local_url->port().empty()) { 337 if (local_url->SchemeIs(chrome::kHttpsScheme)) 338 builder->WithPort(GetDefaultPort(chrome::kHttpsScheme)); 339 else 340 builder->WithPortWildcard(); 341 } else { 342 builder->WithPort(local_url->port()); 343 } 344 } 345 return builder->Build(); 346} 347 348// static 349ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard( 350 const GURL& url) { 351 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 352 ContentSettingsPattern::CreateBuilder(false)); 353 354 const GURL* local_url = &url; 355 if (url.SchemeIsFileSystem() && url.inner_url()) { 356 local_url = url.inner_url(); 357 } 358 if (local_url->SchemeIsFile()) { 359 builder->WithScheme(local_url->scheme())->WithPath(local_url->path()); 360 } else { 361 builder->WithScheme(local_url->scheme())->WithHost(local_url->host()); 362 if (local_url->port().empty()) { 363 builder->WithPort(GetDefaultPort(local_url->scheme())); 364 } else { 365 builder->WithPort(local_url->port()); 366 } 367 } 368 return builder->Build(); 369} 370 371// static 372ContentSettingsPattern ContentSettingsPattern::FromString( 373 const std::string& pattern_spec) { 374 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 375 ContentSettingsPattern::CreateBuilder(false)); 376 content_settings::PatternParser::Parse(pattern_spec, builder.get()); 377 return builder->Build(); 378} 379 380// static 381ContentSettingsPattern ContentSettingsPattern::LegacyFromString( 382 const std::string& pattern_spec) { 383 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 384 ContentSettingsPattern::CreateBuilder(true)); 385 content_settings::PatternParser::Parse(pattern_spec, builder.get()); 386 return builder->Build(); 387} 388 389// static 390ContentSettingsPattern ContentSettingsPattern::Wildcard() { 391 scoped_ptr<ContentSettingsPattern::BuilderInterface> builder( 392 ContentSettingsPattern::CreateBuilder(true)); 393 builder->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()-> 394 WithPathWildcard(); 395 return builder->Build(); 396} 397 398ContentSettingsPattern::ContentSettingsPattern() 399 : is_valid_(false) { 400} 401 402ContentSettingsPattern::ContentSettingsPattern( 403 const PatternParts& parts, 404 bool valid) 405 : parts_(parts), 406 is_valid_(valid) { 407} 408 409void ContentSettingsPattern::WriteToMessage(IPC::Message* m) const { 410 IPC::WriteParam(m, is_valid_); 411 IPC::WriteParam(m, parts_); 412} 413 414bool ContentSettingsPattern::ReadFromMessage(const IPC::Message* m, 415 PickleIterator* iter) { 416 return IPC::ReadParam(m, iter, &is_valid_) && 417 IPC::ReadParam(m, iter, &parts_); 418} 419 420bool ContentSettingsPattern::Matches( 421 const GURL& url) const { 422 // An invalid pattern matches nothing. 423 if (!is_valid_) 424 return false; 425 426 const GURL* local_url = &url; 427 if (url.SchemeIsFileSystem() && url.inner_url()) { 428 local_url = url.inner_url(); 429 } 430 431 // Match the scheme part. 432 const std::string scheme(local_url->scheme()); 433 if (!parts_.is_scheme_wildcard && 434 parts_.scheme != scheme) { 435 return false; 436 } 437 438 // File URLs have no host. Matches if the pattern has the path wildcard set, 439 // or if the path in the URL is identical to the one in the pattern. 440 // For filesystem:file URLs, the path used is the filesystem type, so all 441 // filesystem:file:///temporary/... are equivalent. 442 // TODO(markusheintz): Content settings should be defined for all files on 443 // a machine. Unless there is a good use case for supporting paths for file 444 // patterns, stop supporting path for file patterns. 445 if (!parts_.is_scheme_wildcard && scheme == chrome::kFileScheme) 446 return parts_.is_path_wildcard || 447 parts_.path == std::string(local_url->path()); 448 449 // Match the host part. 450 const std::string host(net::TrimEndingDot(local_url->host())); 451 if (!parts_.has_domain_wildcard) { 452 if (parts_.host != host) 453 return false; 454 } else { 455 if (!IsSubDomainOrEqual(host, parts_.host)) 456 return false; 457 } 458 459 // For chrome extensions URLs ignore the port. 460 if (parts_.scheme == std::string(chrome::kExtensionScheme)) 461 return true; 462 463 // Match the port part. 464 std::string port(local_url->port()); 465 466 // Use the default port if the port string is empty. GURL returns an empty 467 // string if no port at all was specified or if the default port was 468 // specified. 469 if (port.empty()) { 470 port = GetDefaultPort(scheme); 471 } 472 473 if (!parts_.is_port_wildcard && 474 parts_.port != port ) { 475 return false; 476 } 477 478 return true; 479} 480 481bool ContentSettingsPattern::MatchesAllHosts() const { 482 return parts_.has_domain_wildcard && parts_.host.empty(); 483} 484 485const std::string ContentSettingsPattern::ToString() const { 486 if (IsValid()) 487 return content_settings::PatternParser::ToString(parts_); 488 else 489 return ""; 490} 491 492ContentSettingsPattern::Relation ContentSettingsPattern::Compare( 493 const ContentSettingsPattern& other) const { 494 // Two invalid patterns are identical in the way they behave. They don't match 495 // anything and are represented as an empty string. So it's fair to treat them 496 // as identical. 497 if ((this == &other) || 498 (!is_valid_ && !other.is_valid_)) 499 return IDENTITY; 500 501 if (!is_valid_ && other.is_valid_) 502 return DISJOINT_ORDER_POST; 503 if (is_valid_ && !other.is_valid_) 504 return DISJOINT_ORDER_PRE; 505 506 // If either host, port or scheme are disjoint return immediately. 507 Relation host_relation = CompareHost(parts_, other.parts_); 508 if (host_relation == DISJOINT_ORDER_PRE || 509 host_relation == DISJOINT_ORDER_POST) 510 return host_relation; 511 512 Relation port_relation = ComparePort(parts_, other.parts_); 513 if (port_relation == DISJOINT_ORDER_PRE || 514 port_relation == DISJOINT_ORDER_POST) 515 return port_relation; 516 517 Relation scheme_relation = CompareScheme(parts_, other.parts_); 518 if (scheme_relation == DISJOINT_ORDER_PRE || 519 scheme_relation == DISJOINT_ORDER_POST) 520 return scheme_relation; 521 522 if (host_relation != IDENTITY) 523 return host_relation; 524 if (port_relation != IDENTITY) 525 return port_relation; 526 return scheme_relation; 527} 528 529bool ContentSettingsPattern::operator==( 530 const ContentSettingsPattern& other) const { 531 return Compare(other) == IDENTITY; 532} 533 534bool ContentSettingsPattern::operator!=( 535 const ContentSettingsPattern& other) const { 536 return !(*this == other); 537} 538 539bool ContentSettingsPattern::operator<( 540 const ContentSettingsPattern& other) const { 541 return Compare(other) < 0; 542} 543 544bool ContentSettingsPattern::operator>( 545 const ContentSettingsPattern& other) const { 546 return Compare(other) > 0; 547} 548 549// static 550ContentSettingsPattern::Relation ContentSettingsPattern::CompareHost( 551 const ContentSettingsPattern::PatternParts& parts, 552 const ContentSettingsPattern::PatternParts& other_parts) { 553 if (!parts.has_domain_wildcard && !other_parts.has_domain_wildcard) { 554 // Case 1: No host starts with a wild card 555 int result = CompareDomainNames(parts.host, other_parts.host); 556 if (result == 0) 557 return ContentSettingsPattern::IDENTITY; 558 if (result < 0) 559 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 560 return ContentSettingsPattern::DISJOINT_ORDER_POST; 561 } else if (parts.has_domain_wildcard && !other_parts.has_domain_wildcard) { 562 // Case 2: |host| starts with a domain wildcard and |other_host| does not 563 // start with a domain wildcard. 564 // Examples: 565 // "this" host: [*.]google.com 566 // "other" host: google.com 567 // 568 // [*.]google.com 569 // mail.google.com 570 // 571 // [*.]mail.google.com 572 // google.com 573 // 574 // [*.]youtube.com 575 // google.de 576 // 577 // [*.]youtube.com 578 // mail.google.com 579 // 580 // * 581 // google.de 582 if (IsSubDomainOrEqual(other_parts.host, parts.host)) { 583 return ContentSettingsPattern::SUCCESSOR; 584 } else { 585 if (CompareDomainNames(parts.host, other_parts.host) < 0) 586 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 587 return ContentSettingsPattern::DISJOINT_ORDER_POST; 588 } 589 } else if (!parts.has_domain_wildcard && other_parts.has_domain_wildcard) { 590 // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts 591 // with a domain wildcard. 592 if (IsSubDomainOrEqual(parts.host, other_parts.host)) { 593 return ContentSettingsPattern::PREDECESSOR; 594 } else { 595 if (CompareDomainNames(parts.host, other_parts.host) < 0) 596 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 597 return ContentSettingsPattern::DISJOINT_ORDER_POST; 598 } 599 } else if (parts.has_domain_wildcard && other_parts.has_domain_wildcard) { 600 // Case 4: |host| and |other_host| both start with a domain wildcard. 601 // Examples: 602 // [*.]google.com 603 // [*.]google.com 604 // 605 // [*.]google.com 606 // [*.]mail.google.com 607 // 608 // [*.]youtube.com 609 // [*.]google.de 610 // 611 // [*.]youtube.com 612 // [*.]mail.google.com 613 // 614 // [*.]youtube.com 615 // * 616 // 617 // * 618 // [*.]youtube.com 619 if (parts.host == other_parts.host) { 620 return ContentSettingsPattern::IDENTITY; 621 } else if (IsSubDomainOrEqual(other_parts.host, parts.host)) { 622 return ContentSettingsPattern::SUCCESSOR; 623 } else if (IsSubDomainOrEqual(parts.host, other_parts.host)) { 624 return ContentSettingsPattern::PREDECESSOR; 625 } else { 626 if (CompareDomainNames(parts.host, other_parts.host) < 0) 627 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 628 return ContentSettingsPattern::DISJOINT_ORDER_POST; 629 } 630 } 631 632 NOTREACHED(); 633 return ContentSettingsPattern::IDENTITY; 634} 635 636// static 637ContentSettingsPattern::Relation ContentSettingsPattern::CompareScheme( 638 const ContentSettingsPattern::PatternParts& parts, 639 const ContentSettingsPattern::PatternParts& other_parts) { 640 if (parts.is_scheme_wildcard && !other_parts.is_scheme_wildcard) 641 return ContentSettingsPattern::SUCCESSOR; 642 if (!parts.is_scheme_wildcard && other_parts.is_scheme_wildcard) 643 return ContentSettingsPattern::PREDECESSOR; 644 645 int result = parts.scheme.compare(other_parts.scheme); 646 if (result == 0) 647 return ContentSettingsPattern::IDENTITY; 648 if (result > 0) 649 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 650 return ContentSettingsPattern::DISJOINT_ORDER_POST; 651} 652 653// static 654ContentSettingsPattern::Relation ContentSettingsPattern::ComparePort( 655 const ContentSettingsPattern::PatternParts& parts, 656 const ContentSettingsPattern::PatternParts& other_parts) { 657 if (parts.is_port_wildcard && !other_parts.is_port_wildcard) 658 return ContentSettingsPattern::SUCCESSOR; 659 if (!parts.is_port_wildcard && other_parts.is_port_wildcard) 660 return ContentSettingsPattern::PREDECESSOR; 661 662 int result = parts.port.compare(other_parts.port); 663 if (result == 0) 664 return ContentSettingsPattern::IDENTITY; 665 if (result > 0) 666 return ContentSettingsPattern::DISJOINT_ORDER_PRE; 667 return ContentSettingsPattern::DISJOINT_ORDER_POST; 668} 669