content_settings_pattern.cc revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/common/content_settings_pattern.h"
6
7#include <vector>
8
9#include "base/memory/scoped_ptr.h"
10#include "base/string_util.h"
11#include "base/strings/string_split.h"
12#include "chrome/common/content_settings_pattern_parser.h"
13#include "chrome/common/render_messages.h"
14#include "chrome/common/url_constants.h"
15#include "extensions/common/constants.h"
16#include "googleurl/src/gurl.h"
17#include "googleurl/src/url_canon.h"
18#include "ipc/ipc_message_utils.h"
19#include "net/base/dns_util.h"
20#include "net/base/net_util.h"
21
22namespace {
23
24std::string GetDefaultPort(const std::string& scheme) {
25  if (scheme == chrome::kHttpScheme)
26    return "80";
27  if (scheme == chrome::kHttpsScheme)
28    return "443";
29  return "";
30}
31
32// Returns true if |sub_domain| is a sub domain or equls |domain|.  E.g.
33// "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a
34// subdomain of "host.com".
35bool IsSubDomainOrEqual(const std::string& sub_domain,
36                        const std::string& domain) {
37  // The empty string serves as wildcard. Each domain is a subdomain of the
38  // wildcard.
39  if (domain.empty())
40    return true;
41  const size_t match = sub_domain.rfind(domain);
42  if (match == std::string::npos ||
43      (match > 0 && sub_domain[match - 1] != '.') ||
44      (match + domain.length() != sub_domain.length())) {
45    return false;
46  }
47  return true;
48}
49
50// Compares two domain names.
51int CompareDomainNames(const std::string& str1, const std::string& str2) {
52  std::vector<std::string> domain_name1;
53  std::vector<std::string> domain_name2;
54
55  base::SplitString(str1, '.', &domain_name1);
56  base::SplitString(str2, '.', &domain_name2);
57
58  int i1 = domain_name1.size() - 1;
59  int i2 = domain_name2.size() - 1;
60  int rv;
61  while (i1 >= 0 && i2 >= 0) {
62    // domain names are stored in puny code. So it's fine to use the compare
63    // method.
64    rv = domain_name1[i1].compare(domain_name2[i2]);
65    if (rv != 0)
66      return rv;
67    --i1;
68    --i2;
69  }
70
71  if (i1 > i2)
72    return 1;
73
74  if (i1 < i2)
75    return -1;
76
77  // The domain names are identical.
78  return 0;
79}
80
81typedef ContentSettingsPattern::BuilderInterface BuilderInterface;
82
83}  // namespace
84
85// ////////////////////////////////////////////////////////////////////////////
86// ContentSettingsPattern::Builder
87//
88ContentSettingsPattern::Builder::Builder(bool use_legacy_validate)
89    : is_valid_(true),
90      use_legacy_validate_(use_legacy_validate) {}
91
92ContentSettingsPattern::Builder::~Builder() {}
93
94BuilderInterface* ContentSettingsPattern::Builder::WithPort(
95    const std::string& port) {
96  parts_.port = port;
97  parts_.is_port_wildcard = false;
98  return this;
99}
100
101BuilderInterface* ContentSettingsPattern::Builder::WithPortWildcard() {
102  parts_.port = "";
103  parts_.is_port_wildcard = true;
104  return this;
105}
106
107BuilderInterface* ContentSettingsPattern::Builder::WithHost(
108    const std::string& host) {
109  parts_.host = host;
110  return this;
111}
112
113BuilderInterface* ContentSettingsPattern::Builder::WithDomainWildcard() {
114  parts_.has_domain_wildcard = true;
115  return this;
116}
117
118BuilderInterface* ContentSettingsPattern::Builder::WithScheme(
119    const std::string& scheme) {
120  parts_.scheme = scheme;
121  parts_.is_scheme_wildcard = false;
122  return this;
123}
124
125BuilderInterface* ContentSettingsPattern::Builder::WithSchemeWildcard() {
126  parts_.scheme = "";
127  parts_.is_scheme_wildcard = true;
128  return this;
129}
130
131BuilderInterface* ContentSettingsPattern::Builder::WithPath(
132    const std::string& path) {
133  parts_.path = path;
134  parts_.is_path_wildcard = false;
135  return this;
136}
137
138BuilderInterface* ContentSettingsPattern::Builder::WithPathWildcard() {
139  parts_.path = "";
140  parts_.is_path_wildcard = true;
141  return this;
142}
143
144BuilderInterface* ContentSettingsPattern::Builder::Invalid() {
145  is_valid_ = false;
146  return this;
147}
148
149ContentSettingsPattern ContentSettingsPattern::Builder::Build() {
150  if (!is_valid_)
151    return ContentSettingsPattern();
152  if (!Canonicalize(&parts_))
153    return ContentSettingsPattern();
154  if (use_legacy_validate_) {
155    is_valid_ = LegacyValidate(parts_);
156  } else {
157    is_valid_ = Validate(parts_);
158  }
159  return ContentSettingsPattern(parts_, is_valid_);
160}
161
162// static
163bool ContentSettingsPattern::Builder::Canonicalize(PatternParts* parts) {
164  // Canonicalize the scheme part.
165  const std::string scheme(StringToLowerASCII(parts->scheme));
166  parts->scheme = scheme;
167
168  if (parts->scheme == std::string(chrome::kFileScheme) &&
169      !parts->is_path_wildcard) {
170      GURL url(std::string(chrome::kFileScheme) +
171               std::string(content::kStandardSchemeSeparator) + parts->path);
172      parts->path = url.path();
173  }
174
175  // Canonicalize the host part.
176  const std::string host(parts->host);
177  url_canon::CanonHostInfo host_info;
178  std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
179  if (host_info.IsIPAddress() && parts->has_domain_wildcard)
180    return false;
181  canonicalized_host = net::TrimEndingDot(canonicalized_host);
182
183  parts->host = "";
184  if ((host.find('*') == std::string::npos) &&
185      !canonicalized_host.empty()) {
186    // Valid host.
187    parts->host += canonicalized_host;
188  }
189  return true;
190}
191
192// static
193bool ContentSettingsPattern::Builder::Validate(const PatternParts& parts) {
194  // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}.
195  if ((parts.is_scheme_wildcard && !parts.scheme.empty()) ||
196      (parts.is_port_wildcard && !parts.port.empty())) {
197    NOTREACHED();
198    return false;
199  }
200
201  // file:// URL patterns have an empty host and port.
202  if (parts.scheme == std::string(chrome::kFileScheme)) {
203    if (parts.has_domain_wildcard || !parts.host.empty() || !parts.port.empty())
204      return false;
205    if (parts.is_path_wildcard)
206      return parts.path.empty();
207    return (!parts.path.empty() &&
208            parts.path != "/" &&
209            parts.path.find("*") == std::string::npos);
210  }
211
212  // If the pattern is for an extension URL test if it is valid.
213  if (parts.scheme == std::string(extensions::kExtensionScheme) &&
214      parts.port.empty() &&
215      !parts.is_port_wildcard) {
216    return true;
217  }
218
219  // Non-file patterns are invalid if either the scheme, host or port part is
220  // empty.
221  if ((parts.scheme.empty() && !parts.is_scheme_wildcard) ||
222      (parts.host.empty() && !parts.has_domain_wildcard) ||
223      (parts.port.empty() && !parts.is_port_wildcard)) {
224    return false;
225  }
226
227  if (parts.host.find("*") != std::string::npos)
228    return false;
229
230  // Test if the scheme is supported or a wildcard.
231  if (!parts.is_scheme_wildcard &&
232      parts.scheme != std::string(chrome::kHttpScheme) &&
233      parts.scheme != std::string(chrome::kHttpsScheme)) {
234    return false;
235  }
236  return true;
237}
238
239// static
240bool ContentSettingsPattern::Builder::LegacyValidate(
241    const PatternParts& parts) {
242  // If the pattern is for a "file-pattern" test if it is valid.
243  if (parts.scheme == std::string(chrome::kFileScheme) &&
244      !parts.is_scheme_wildcard &&
245      parts.host.empty() &&
246      parts.port.empty())
247    return true;
248
249  // If the pattern is for an extension URL test if it is valid.
250  if (parts.scheme == std::string(extensions::kExtensionScheme) &&
251      !parts.is_scheme_wildcard &&
252      !parts.host.empty() &&
253      !parts.has_domain_wildcard &&
254      parts.port.empty() &&
255      !parts.is_port_wildcard)
256    return true;
257
258  // Non-file patterns are invalid if either the scheme, host or port part is
259  // empty.
260  if ((!parts.is_scheme_wildcard) ||
261      (parts.host.empty() && !parts.has_domain_wildcard) ||
262      (!parts.is_port_wildcard))
263    return false;
264
265  // Test if the scheme is supported or a wildcard.
266  if (!parts.is_scheme_wildcard &&
267      parts.scheme != std::string(chrome::kHttpScheme) &&
268      parts.scheme != std::string(chrome::kHttpsScheme)) {
269    return false;
270  }
271  return true;
272}
273
274// ////////////////////////////////////////////////////////////////////////////
275// ContentSettingsPattern::PatternParts
276//
277ContentSettingsPattern::PatternParts::PatternParts()
278        : is_scheme_wildcard(false),
279          has_domain_wildcard(false),
280          is_port_wildcard(false),
281          is_path_wildcard(false) {}
282
283ContentSettingsPattern::PatternParts::~PatternParts() {}
284
285// ////////////////////////////////////////////////////////////////////////////
286// ContentSettingsPattern
287//
288
289// The version of the pattern format implemented. Version 1 includes the
290// following patterns:
291//   - [*.]domain.tld (matches domain.tld and all sub-domains)
292//   - host (matches an exact hostname)
293//   - a.b.c.d (matches an exact IPv4 ip)
294//   - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
295//   - file:///tmp/test.html (a complete URL without a host)
296// Version 2 adds a resource identifier for plugins.
297// TODO(jochen): update once this feature is no longer behind a flag.
298const int ContentSettingsPattern::kContentSettingsPatternVersion = 1;
299
300// TODO(markusheintz): These two constants were moved to the Pattern Parser.
301// Remove once the dependency of the ContentSettingsBaseProvider is removed.
302const char* ContentSettingsPattern::kDomainWildcard = "[*.]";
303const size_t ContentSettingsPattern::kDomainWildcardLength = 4;
304
305// static
306BuilderInterface* ContentSettingsPattern::CreateBuilder(
307    bool validate) {
308  return new Builder(validate);
309}
310
311// static
312ContentSettingsPattern ContentSettingsPattern::FromURL(
313    const GURL& url) {
314  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
315      ContentSettingsPattern::CreateBuilder(false));
316
317  const GURL* local_url = &url;
318  if (url.SchemeIsFileSystem() && url.inner_url()) {
319    local_url = url.inner_url();
320  }
321  if (local_url->SchemeIsFile()) {
322    builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
323  } else {
324    // Please keep the order of the ifs below as URLs with an IP as host can
325    // also have a "http" scheme.
326    if (local_url->HostIsIPAddress()) {
327      builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
328    } else if (local_url->SchemeIs(chrome::kHttpScheme)) {
329      builder->WithSchemeWildcard()->WithDomainWildcard()->WithHost(
330          local_url->host());
331    } else if (local_url->SchemeIs(chrome::kHttpsScheme)) {
332      builder->WithScheme(local_url->scheme())->WithDomainWildcard()->WithHost(
333          local_url->host());
334    } else {
335      // Unsupported scheme
336    }
337    if (local_url->port().empty()) {
338      if (local_url->SchemeIs(chrome::kHttpsScheme))
339        builder->WithPort(GetDefaultPort(chrome::kHttpsScheme));
340      else
341        builder->WithPortWildcard();
342    } else {
343      builder->WithPort(local_url->port());
344    }
345  }
346  return builder->Build();
347}
348
349// static
350ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard(
351    const GURL& url) {
352  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
353      ContentSettingsPattern::CreateBuilder(false));
354
355  const GURL* local_url = &url;
356  if (url.SchemeIsFileSystem() && url.inner_url()) {
357    local_url = url.inner_url();
358  }
359  if (local_url->SchemeIsFile()) {
360    builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
361  } else {
362    builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
363    if (local_url->port().empty()) {
364      builder->WithPort(GetDefaultPort(local_url->scheme()));
365    } else {
366      builder->WithPort(local_url->port());
367    }
368  }
369  return builder->Build();
370}
371
372// static
373ContentSettingsPattern ContentSettingsPattern::FromString(
374    const std::string& pattern_spec) {
375  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
376      ContentSettingsPattern::CreateBuilder(false));
377  content_settings::PatternParser::Parse(pattern_spec, builder.get());
378  return builder->Build();
379}
380
381// static
382ContentSettingsPattern ContentSettingsPattern::LegacyFromString(
383    const std::string& pattern_spec) {
384  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
385      ContentSettingsPattern::CreateBuilder(true));
386  content_settings::PatternParser::Parse(pattern_spec, builder.get());
387  return builder->Build();
388}
389
390// static
391ContentSettingsPattern ContentSettingsPattern::Wildcard() {
392  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
393      ContentSettingsPattern::CreateBuilder(true));
394  builder->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()->
395           WithPathWildcard();
396  return builder->Build();
397}
398
399ContentSettingsPattern::ContentSettingsPattern()
400  : is_valid_(false) {
401}
402
403ContentSettingsPattern::ContentSettingsPattern(
404    const PatternParts& parts,
405    bool valid)
406    : parts_(parts),
407      is_valid_(valid) {
408}
409
410void ContentSettingsPattern::WriteToMessage(IPC::Message* m) const {
411  IPC::WriteParam(m, is_valid_);
412  IPC::WriteParam(m, parts_);
413}
414
415bool ContentSettingsPattern::ReadFromMessage(const IPC::Message* m,
416                                             PickleIterator* iter) {
417  return IPC::ReadParam(m, iter, &is_valid_) &&
418         IPC::ReadParam(m, iter, &parts_);
419}
420
421bool ContentSettingsPattern::Matches(
422    const GURL& url) const {
423  // An invalid pattern matches nothing.
424  if (!is_valid_)
425    return false;
426
427  const GURL* local_url = &url;
428  if (url.SchemeIsFileSystem() && url.inner_url()) {
429    local_url = url.inner_url();
430  }
431
432  // Match the scheme part.
433  const std::string scheme(local_url->scheme());
434  if (!parts_.is_scheme_wildcard &&
435      parts_.scheme != scheme) {
436    return false;
437  }
438
439  // File URLs have no host. Matches if the pattern has the path wildcard set,
440  // or if the path in the URL is identical to the one in the pattern.
441  // For filesystem:file URLs, the path used is the filesystem type, so all
442  // filesystem:file:///temporary/... are equivalent.
443  // TODO(markusheintz): Content settings should be defined for all files on
444  // a machine. Unless there is a good use case for supporting paths for file
445  // patterns, stop supporting path for file patterns.
446  if (!parts_.is_scheme_wildcard && scheme == chrome::kFileScheme)
447    return parts_.is_path_wildcard ||
448        parts_.path == std::string(local_url->path());
449
450  // Match the host part.
451  const std::string host(net::TrimEndingDot(local_url->host()));
452  if (!parts_.has_domain_wildcard) {
453    if (parts_.host != host)
454      return false;
455  } else {
456    if (!IsSubDomainOrEqual(host, parts_.host))
457      return false;
458  }
459
460  // For chrome extensions URLs ignore the port.
461  if (parts_.scheme == std::string(extensions::kExtensionScheme))
462    return true;
463
464  // Match the port part.
465  std::string port(local_url->port());
466
467  // Use the default port if the port string is empty. GURL returns an empty
468  // string if no port at all was specified or if the default port was
469  // specified.
470  if (port.empty()) {
471    port = GetDefaultPort(scheme);
472  }
473
474  if (!parts_.is_port_wildcard &&
475      parts_.port != port ) {
476    return false;
477  }
478
479  return true;
480}
481
482bool ContentSettingsPattern::MatchesAllHosts() const {
483  return parts_.has_domain_wildcard && parts_.host.empty();
484}
485
486const std::string ContentSettingsPattern::ToString() const {
487  if (IsValid())
488    return content_settings::PatternParser::ToString(parts_);
489  else
490    return "";
491}
492
493ContentSettingsPattern::Relation ContentSettingsPattern::Compare(
494    const ContentSettingsPattern& other) const {
495  // Two invalid patterns are identical in the way they behave. They don't match
496  // anything and are represented as an empty string. So it's fair to treat them
497  // as identical.
498  if ((this == &other) ||
499      (!is_valid_ && !other.is_valid_))
500    return IDENTITY;
501
502  if (!is_valid_ && other.is_valid_)
503    return DISJOINT_ORDER_POST;
504  if (is_valid_ && !other.is_valid_)
505    return DISJOINT_ORDER_PRE;
506
507  // If either host, port or scheme are disjoint return immediately.
508  Relation host_relation = CompareHost(parts_, other.parts_);
509  if (host_relation == DISJOINT_ORDER_PRE ||
510      host_relation == DISJOINT_ORDER_POST)
511    return host_relation;
512
513  Relation port_relation = ComparePort(parts_, other.parts_);
514  if (port_relation == DISJOINT_ORDER_PRE ||
515      port_relation == DISJOINT_ORDER_POST)
516    return port_relation;
517
518  Relation scheme_relation = CompareScheme(parts_, other.parts_);
519  if (scheme_relation == DISJOINT_ORDER_PRE ||
520      scheme_relation == DISJOINT_ORDER_POST)
521    return scheme_relation;
522
523  if (host_relation != IDENTITY)
524    return host_relation;
525  if (port_relation != IDENTITY)
526    return port_relation;
527  return scheme_relation;
528}
529
530bool ContentSettingsPattern::operator==(
531    const ContentSettingsPattern& other) const {
532  return Compare(other) == IDENTITY;
533}
534
535bool ContentSettingsPattern::operator!=(
536    const ContentSettingsPattern& other) const {
537  return !(*this == other);
538}
539
540bool ContentSettingsPattern::operator<(
541    const ContentSettingsPattern& other) const {
542  return Compare(other) < 0;
543}
544
545bool ContentSettingsPattern::operator>(
546    const ContentSettingsPattern& other) const {
547  return Compare(other) > 0;
548}
549
550// static
551ContentSettingsPattern::Relation ContentSettingsPattern::CompareHost(
552    const ContentSettingsPattern::PatternParts& parts,
553    const ContentSettingsPattern::PatternParts& other_parts) {
554  if (!parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
555    // Case 1: No host starts with a wild card
556    int result = CompareDomainNames(parts.host, other_parts.host);
557    if (result == 0)
558      return ContentSettingsPattern::IDENTITY;
559    if (result < 0)
560      return ContentSettingsPattern::DISJOINT_ORDER_PRE;
561    return ContentSettingsPattern::DISJOINT_ORDER_POST;
562  } else if (parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
563    // Case 2: |host| starts with a domain wildcard and |other_host| does not
564    // start with a domain wildcard.
565    // Examples:
566    // "this" host:   [*.]google.com
567    // "other" host:  google.com
568    //
569    // [*.]google.com
570    // mail.google.com
571    //
572    // [*.]mail.google.com
573    // google.com
574    //
575    // [*.]youtube.com
576    // google.de
577    //
578    // [*.]youtube.com
579    // mail.google.com
580    //
581    // *
582    // google.de
583    if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
584      return ContentSettingsPattern::SUCCESSOR;
585    } else {
586       if (CompareDomainNames(parts.host, other_parts.host) < 0)
587         return ContentSettingsPattern::DISJOINT_ORDER_PRE;
588       return ContentSettingsPattern::DISJOINT_ORDER_POST;
589    }
590  } else if (!parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
591    // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts
592    // with a domain wildcard.
593    if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
594      return ContentSettingsPattern::PREDECESSOR;
595    } else {
596      if (CompareDomainNames(parts.host, other_parts.host) < 0)
597        return ContentSettingsPattern::DISJOINT_ORDER_PRE;
598      return ContentSettingsPattern::DISJOINT_ORDER_POST;
599    }
600  } else if (parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
601    // Case 4: |host| and |other_host| both start with a domain wildcard.
602    // Examples:
603    // [*.]google.com
604    // [*.]google.com
605    //
606    // [*.]google.com
607    // [*.]mail.google.com
608    //
609    // [*.]youtube.com
610    // [*.]google.de
611    //
612    // [*.]youtube.com
613    // [*.]mail.google.com
614    //
615    // [*.]youtube.com
616    // *
617    //
618    // *
619    // [*.]youtube.com
620    if (parts.host == other_parts.host) {
621      return ContentSettingsPattern::IDENTITY;
622    } else if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
623      return ContentSettingsPattern::SUCCESSOR;
624    } else if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
625      return ContentSettingsPattern::PREDECESSOR;
626    } else {
627      if (CompareDomainNames(parts.host, other_parts.host) < 0)
628        return ContentSettingsPattern::DISJOINT_ORDER_PRE;
629      return ContentSettingsPattern::DISJOINT_ORDER_POST;
630    }
631  }
632
633  NOTREACHED();
634  return ContentSettingsPattern::IDENTITY;
635}
636
637// static
638ContentSettingsPattern::Relation ContentSettingsPattern::CompareScheme(
639    const ContentSettingsPattern::PatternParts& parts,
640    const ContentSettingsPattern::PatternParts& other_parts) {
641  if (parts.is_scheme_wildcard && !other_parts.is_scheme_wildcard)
642    return ContentSettingsPattern::SUCCESSOR;
643  if (!parts.is_scheme_wildcard && other_parts.is_scheme_wildcard)
644    return ContentSettingsPattern::PREDECESSOR;
645
646  int result = parts.scheme.compare(other_parts.scheme);
647  if (result == 0)
648    return ContentSettingsPattern::IDENTITY;
649  if (result > 0)
650    return ContentSettingsPattern::DISJOINT_ORDER_PRE;
651  return ContentSettingsPattern::DISJOINT_ORDER_POST;
652}
653
654// static
655ContentSettingsPattern::Relation ContentSettingsPattern::ComparePort(
656    const ContentSettingsPattern::PatternParts& parts,
657    const ContentSettingsPattern::PatternParts& other_parts) {
658  if (parts.is_port_wildcard && !other_parts.is_port_wildcard)
659    return ContentSettingsPattern::SUCCESSOR;
660  if (!parts.is_port_wildcard && other_parts.is_port_wildcard)
661    return ContentSettingsPattern::PREDECESSOR;
662
663  int result = parts.port.compare(other_parts.port);
664  if (result == 0)
665    return ContentSettingsPattern::IDENTITY;
666  if (result > 0)
667    return ContentSettingsPattern::DISJOINT_ORDER_PRE;
668  return ContentSettingsPattern::DISJOINT_ORDER_POST;
669}
670