1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/common/content_settings_pattern.h"
6
7#include <vector>
8
9#include "base/memory/scoped_ptr.h"
10#include "base/strings/string_split.h"
11#include "base/strings/string_util.h"
12#include "chrome/common/content_settings_pattern_parser.h"
13#include "chrome/common/render_messages.h"
14#include "chrome/common/url_constants.h"
15#include "extensions/common/constants.h"
16#include "ipc/ipc_message_utils.h"
17#include "net/base/dns_util.h"
18#include "net/base/net_util.h"
19#include "url/gurl.h"
20#include "url/url_canon.h"
21
22namespace {
23
24std::string GetDefaultPort(const std::string& scheme) {
25  if (scheme == url::kHttpScheme)
26    return "80";
27  if (scheme == url::kHttpsScheme)
28    return "443";
29  return std::string();
30}
31
32// Returns true if |sub_domain| is a sub domain or equls |domain|.  E.g.
33// "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a
34// subdomain of "host.com".
35bool IsSubDomainOrEqual(const std::string& sub_domain,
36                        const std::string& domain) {
37  // The empty string serves as wildcard. Each domain is a subdomain of the
38  // wildcard.
39  if (domain.empty())
40    return true;
41  const size_t match = sub_domain.rfind(domain);
42  if (match == std::string::npos ||
43      (match > 0 && sub_domain[match - 1] != '.') ||
44      (match + domain.length() != sub_domain.length())) {
45    return false;
46  }
47  return true;
48}
49
50// Compares two domain names.
51int CompareDomainNames(const std::string& str1, const std::string& str2) {
52  std::vector<std::string> domain_name1;
53  std::vector<std::string> domain_name2;
54
55  base::SplitString(str1, '.', &domain_name1);
56  base::SplitString(str2, '.', &domain_name2);
57
58  int i1 = domain_name1.size() - 1;
59  int i2 = domain_name2.size() - 1;
60  int rv;
61  while (i1 >= 0 && i2 >= 0) {
62    // domain names are stored in puny code. So it's fine to use the compare
63    // method.
64    rv = domain_name1[i1].compare(domain_name2[i2]);
65    if (rv != 0)
66      return rv;
67    --i1;
68    --i2;
69  }
70
71  if (i1 > i2)
72    return 1;
73
74  if (i1 < i2)
75    return -1;
76
77  // The domain names are identical.
78  return 0;
79}
80
81typedef ContentSettingsPattern::BuilderInterface BuilderInterface;
82
83}  // namespace
84
85// ////////////////////////////////////////////////////////////////////////////
86// ContentSettingsPattern::Builder
87//
88ContentSettingsPattern::Builder::Builder(bool use_legacy_validate)
89    : is_valid_(true),
90      use_legacy_validate_(use_legacy_validate) {}
91
92ContentSettingsPattern::Builder::~Builder() {}
93
94BuilderInterface* ContentSettingsPattern::Builder::WithPort(
95    const std::string& port) {
96  parts_.port = port;
97  parts_.is_port_wildcard = false;
98  return this;
99}
100
101BuilderInterface* ContentSettingsPattern::Builder::WithPortWildcard() {
102  parts_.port = "";
103  parts_.is_port_wildcard = true;
104  return this;
105}
106
107BuilderInterface* ContentSettingsPattern::Builder::WithHost(
108    const std::string& host) {
109  parts_.host = host;
110  return this;
111}
112
113BuilderInterface* ContentSettingsPattern::Builder::WithDomainWildcard() {
114  parts_.has_domain_wildcard = true;
115  return this;
116}
117
118BuilderInterface* ContentSettingsPattern::Builder::WithScheme(
119    const std::string& scheme) {
120  parts_.scheme = scheme;
121  parts_.is_scheme_wildcard = false;
122  return this;
123}
124
125BuilderInterface* ContentSettingsPattern::Builder::WithSchemeWildcard() {
126  parts_.scheme = "";
127  parts_.is_scheme_wildcard = true;
128  return this;
129}
130
131BuilderInterface* ContentSettingsPattern::Builder::WithPath(
132    const std::string& path) {
133  parts_.path = path;
134  parts_.is_path_wildcard = false;
135  return this;
136}
137
138BuilderInterface* ContentSettingsPattern::Builder::WithPathWildcard() {
139  parts_.path = "";
140  parts_.is_path_wildcard = true;
141  return this;
142}
143
144BuilderInterface* ContentSettingsPattern::Builder::Invalid() {
145  is_valid_ = false;
146  return this;
147}
148
149ContentSettingsPattern ContentSettingsPattern::Builder::Build() {
150  if (!is_valid_)
151    return ContentSettingsPattern();
152  if (!Canonicalize(&parts_))
153    return ContentSettingsPattern();
154  if (use_legacy_validate_) {
155    is_valid_ = LegacyValidate(parts_);
156  } else {
157    is_valid_ = Validate(parts_);
158  }
159  if (!is_valid_)
160    return ContentSettingsPattern();
161
162  // A pattern is invalid if canonicalization is not idempotent.
163  // This check is here because it should be checked no matter
164  // use_legacy_validate_ is.
165  PatternParts parts(parts_);
166  if (!Canonicalize(&parts))
167    return ContentSettingsPattern();
168  if (ContentSettingsPattern(parts_, true) !=
169      ContentSettingsPattern(parts, true)) {
170    return ContentSettingsPattern();
171  }
172
173  return ContentSettingsPattern(parts_, is_valid_);
174}
175
176// static
177bool ContentSettingsPattern::Builder::Canonicalize(PatternParts* parts) {
178  // Canonicalize the scheme part.
179  const std::string scheme(StringToLowerASCII(parts->scheme));
180  parts->scheme = scheme;
181
182  if (parts->scheme == std::string(url::kFileScheme) &&
183      !parts->is_path_wildcard) {
184    GURL url(std::string(url::kFileScheme) +
185             std::string(url::kStandardSchemeSeparator) + parts->path);
186    parts->path = url.path();
187  }
188
189  // Canonicalize the host part.
190  const std::string host(parts->host);
191  url::CanonHostInfo host_info;
192  std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
193  if (host_info.IsIPAddress() && parts->has_domain_wildcard)
194    return false;
195  canonicalized_host = net::TrimEndingDot(canonicalized_host);
196
197  parts->host = "";
198  if ((host.find('*') == std::string::npos) &&
199      !canonicalized_host.empty()) {
200    // Valid host.
201    parts->host += canonicalized_host;
202  }
203  return true;
204}
205
206// static
207bool ContentSettingsPattern::Builder::Validate(const PatternParts& parts) {
208  // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}.
209  if ((parts.is_scheme_wildcard && !parts.scheme.empty()) ||
210      (parts.is_port_wildcard && !parts.port.empty())) {
211    NOTREACHED();
212    return false;
213  }
214
215  // file:// URL patterns have an empty host and port.
216  if (parts.scheme == std::string(url::kFileScheme)) {
217    if (parts.has_domain_wildcard || !parts.host.empty() || !parts.port.empty())
218      return false;
219    if (parts.is_path_wildcard)
220      return parts.path.empty();
221    return (!parts.path.empty() &&
222            parts.path != "/" &&
223            parts.path.find("*") == std::string::npos);
224  }
225
226  // If the pattern is for an extension URL test if it is valid.
227  if (parts.scheme == std::string(extensions::kExtensionScheme) &&
228      parts.port.empty() &&
229      !parts.is_port_wildcard) {
230    return true;
231  }
232
233  // Non-file patterns are invalid if either the scheme, host or port part is
234  // empty.
235  if ((parts.scheme.empty() && !parts.is_scheme_wildcard) ||
236      (parts.host.empty() && !parts.has_domain_wildcard) ||
237      (parts.port.empty() && !parts.is_port_wildcard)) {
238    return false;
239  }
240
241  if (parts.host.find("*") != std::string::npos)
242    return false;
243
244  // Test if the scheme is supported or a wildcard.
245  if (!parts.is_scheme_wildcard &&
246      parts.scheme != std::string(url::kHttpScheme) &&
247      parts.scheme != std::string(url::kHttpsScheme)) {
248    return false;
249  }
250  return true;
251}
252
253// static
254bool ContentSettingsPattern::Builder::LegacyValidate(
255    const PatternParts& parts) {
256  // If the pattern is for a "file-pattern" test if it is valid.
257  if (parts.scheme == std::string(url::kFileScheme) &&
258      !parts.is_scheme_wildcard &&
259      parts.host.empty() &&
260      parts.port.empty())
261    return true;
262
263  // If the pattern is for an extension URL test if it is valid.
264  if (parts.scheme == std::string(extensions::kExtensionScheme) &&
265      !parts.is_scheme_wildcard &&
266      !parts.host.empty() &&
267      !parts.has_domain_wildcard &&
268      parts.port.empty() &&
269      !parts.is_port_wildcard)
270    return true;
271
272  // Non-file patterns are invalid if either the scheme, host or port part is
273  // empty.
274  if ((!parts.is_scheme_wildcard) ||
275      (parts.host.empty() && !parts.has_domain_wildcard) ||
276      (!parts.is_port_wildcard))
277    return false;
278
279  // Test if the scheme is supported or a wildcard.
280  if (!parts.is_scheme_wildcard &&
281      parts.scheme != std::string(url::kHttpScheme) &&
282      parts.scheme != std::string(url::kHttpsScheme)) {
283    return false;
284  }
285  return true;
286}
287
288// ////////////////////////////////////////////////////////////////////////////
289// ContentSettingsPattern::PatternParts
290//
291ContentSettingsPattern::PatternParts::PatternParts()
292        : is_scheme_wildcard(false),
293          has_domain_wildcard(false),
294          is_port_wildcard(false),
295          is_path_wildcard(false) {}
296
297ContentSettingsPattern::PatternParts::~PatternParts() {}
298
299// ////////////////////////////////////////////////////////////////////////////
300// ContentSettingsPattern
301//
302
303// The version of the pattern format implemented. Version 1 includes the
304// following patterns:
305//   - [*.]domain.tld (matches domain.tld and all sub-domains)
306//   - host (matches an exact hostname)
307//   - a.b.c.d (matches an exact IPv4 ip)
308//   - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
309//   - file:///tmp/test.html (a complete URL without a host)
310// Version 2 adds a resource identifier for plugins.
311// TODO(jochen): update once this feature is no longer behind a flag.
312const int ContentSettingsPattern::kContentSettingsPatternVersion = 1;
313
314// TODO(markusheintz): These two constants were moved to the Pattern Parser.
315// Remove once the dependency of the ContentSettingsBaseProvider is removed.
316const char* ContentSettingsPattern::kDomainWildcard = "[*.]";
317const size_t ContentSettingsPattern::kDomainWildcardLength = 4;
318
319// static
320BuilderInterface* ContentSettingsPattern::CreateBuilder(
321    bool validate) {
322  return new Builder(validate);
323}
324
325// static
326ContentSettingsPattern ContentSettingsPattern::FromURL(
327    const GURL& url) {
328  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
329      ContentSettingsPattern::CreateBuilder(false));
330
331  const GURL* local_url = &url;
332  if (url.SchemeIsFileSystem() && url.inner_url()) {
333    local_url = url.inner_url();
334  }
335  if (local_url->SchemeIsFile()) {
336    builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
337  } else {
338    // Please keep the order of the ifs below as URLs with an IP as host can
339    // also have a "http" scheme.
340    if (local_url->HostIsIPAddress()) {
341      builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
342    } else if (local_url->SchemeIs(url::kHttpScheme)) {
343      builder->WithSchemeWildcard()->WithDomainWildcard()->WithHost(
344          local_url->host());
345    } else if (local_url->SchemeIs(url::kHttpsScheme)) {
346      builder->WithScheme(local_url->scheme())->WithDomainWildcard()->WithHost(
347          local_url->host());
348    } else {
349      // Unsupported scheme
350    }
351    if (local_url->port().empty()) {
352      if (local_url->SchemeIs(url::kHttpsScheme))
353        builder->WithPort(GetDefaultPort(url::kHttpsScheme));
354      else
355        builder->WithPortWildcard();
356    } else {
357      builder->WithPort(local_url->port());
358    }
359  }
360  return builder->Build();
361}
362
363// static
364ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard(
365    const GURL& url) {
366  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
367      ContentSettingsPattern::CreateBuilder(false));
368
369  const GURL* local_url = &url;
370  if (url.SchemeIsFileSystem() && url.inner_url()) {
371    local_url = url.inner_url();
372  }
373  if (local_url->SchemeIsFile()) {
374    builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
375  } else {
376    builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
377    if (local_url->port().empty()) {
378      builder->WithPort(GetDefaultPort(local_url->scheme()));
379    } else {
380      builder->WithPort(local_url->port());
381    }
382  }
383  return builder->Build();
384}
385
386// static
387ContentSettingsPattern ContentSettingsPattern::FromString(
388    const std::string& pattern_spec) {
389  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
390      ContentSettingsPattern::CreateBuilder(false));
391  content_settings::PatternParser::Parse(pattern_spec, builder.get());
392  return builder->Build();
393}
394
395// static
396ContentSettingsPattern ContentSettingsPattern::LegacyFromString(
397    const std::string& pattern_spec) {
398  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
399      ContentSettingsPattern::CreateBuilder(true));
400  content_settings::PatternParser::Parse(pattern_spec, builder.get());
401  return builder->Build();
402}
403
404// static
405ContentSettingsPattern ContentSettingsPattern::Wildcard() {
406  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
407      ContentSettingsPattern::CreateBuilder(true));
408  builder->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()->
409           WithPathWildcard();
410  return builder->Build();
411}
412
413ContentSettingsPattern::ContentSettingsPattern()
414  : is_valid_(false) {
415}
416
417ContentSettingsPattern::ContentSettingsPattern(
418    const PatternParts& parts,
419    bool valid)
420    : parts_(parts),
421      is_valid_(valid) {
422}
423
424void ContentSettingsPattern::WriteToMessage(IPC::Message* m) const {
425  IPC::WriteParam(m, is_valid_);
426  IPC::WriteParam(m, parts_);
427}
428
429bool ContentSettingsPattern::ReadFromMessage(const IPC::Message* m,
430                                             PickleIterator* iter) {
431  return IPC::ReadParam(m, iter, &is_valid_) &&
432         IPC::ReadParam(m, iter, &parts_);
433}
434
435bool ContentSettingsPattern::Matches(
436    const GURL& url) const {
437  // An invalid pattern matches nothing.
438  if (!is_valid_)
439    return false;
440
441  const GURL* local_url = &url;
442  if (url.SchemeIsFileSystem() && url.inner_url()) {
443    local_url = url.inner_url();
444  }
445
446  // Match the scheme part.
447  const std::string scheme(local_url->scheme());
448  if (!parts_.is_scheme_wildcard &&
449      parts_.scheme != scheme) {
450    return false;
451  }
452
453  // File URLs have no host. Matches if the pattern has the path wildcard set,
454  // or if the path in the URL is identical to the one in the pattern.
455  // For filesystem:file URLs, the path used is the filesystem type, so all
456  // filesystem:file:///temporary/... are equivalent.
457  // TODO(markusheintz): Content settings should be defined for all files on
458  // a machine. Unless there is a good use case for supporting paths for file
459  // patterns, stop supporting path for file patterns.
460  if (!parts_.is_scheme_wildcard && scheme == url::kFileScheme)
461    return parts_.is_path_wildcard ||
462        parts_.path == std::string(local_url->path());
463
464  // Match the host part.
465  const std::string host(net::TrimEndingDot(local_url->host()));
466  if (!parts_.has_domain_wildcard) {
467    if (parts_.host != host)
468      return false;
469  } else {
470    if (!IsSubDomainOrEqual(host, parts_.host))
471      return false;
472  }
473
474  // For chrome extensions URLs ignore the port.
475  if (parts_.scheme == std::string(extensions::kExtensionScheme))
476    return true;
477
478  // Match the port part.
479  std::string port(local_url->port());
480
481  // Use the default port if the port string is empty. GURL returns an empty
482  // string if no port at all was specified or if the default port was
483  // specified.
484  if (port.empty()) {
485    port = GetDefaultPort(scheme);
486  }
487
488  if (!parts_.is_port_wildcard &&
489      parts_.port != port ) {
490    return false;
491  }
492
493  return true;
494}
495
496bool ContentSettingsPattern::MatchesAllHosts() const {
497  return parts_.has_domain_wildcard && parts_.host.empty();
498}
499
500const std::string ContentSettingsPattern::ToString() const {
501  if (IsValid())
502    return content_settings::PatternParser::ToString(parts_);
503  else
504    return std::string();
505}
506
507ContentSettingsPattern::Relation ContentSettingsPattern::Compare(
508    const ContentSettingsPattern& other) const {
509  // Two invalid patterns are identical in the way they behave. They don't match
510  // anything and are represented as an empty string. So it's fair to treat them
511  // as identical.
512  if ((this == &other) ||
513      (!is_valid_ && !other.is_valid_))
514    return IDENTITY;
515
516  if (!is_valid_ && other.is_valid_)
517    return DISJOINT_ORDER_POST;
518  if (is_valid_ && !other.is_valid_)
519    return DISJOINT_ORDER_PRE;
520
521  // If either host, port or scheme are disjoint return immediately.
522  Relation host_relation = CompareHost(parts_, other.parts_);
523  if (host_relation == DISJOINT_ORDER_PRE ||
524      host_relation == DISJOINT_ORDER_POST)
525    return host_relation;
526
527  Relation port_relation = ComparePort(parts_, other.parts_);
528  if (port_relation == DISJOINT_ORDER_PRE ||
529      port_relation == DISJOINT_ORDER_POST)
530    return port_relation;
531
532  Relation scheme_relation = CompareScheme(parts_, other.parts_);
533  if (scheme_relation == DISJOINT_ORDER_PRE ||
534      scheme_relation == DISJOINT_ORDER_POST)
535    return scheme_relation;
536
537  if (host_relation != IDENTITY)
538    return host_relation;
539  if (port_relation != IDENTITY)
540    return port_relation;
541  return scheme_relation;
542}
543
544bool ContentSettingsPattern::operator==(
545    const ContentSettingsPattern& other) const {
546  return Compare(other) == IDENTITY;
547}
548
549bool ContentSettingsPattern::operator!=(
550    const ContentSettingsPattern& other) const {
551  return !(*this == other);
552}
553
554bool ContentSettingsPattern::operator<(
555    const ContentSettingsPattern& other) const {
556  return Compare(other) < 0;
557}
558
559bool ContentSettingsPattern::operator>(
560    const ContentSettingsPattern& other) const {
561  return Compare(other) > 0;
562}
563
564// static
565ContentSettingsPattern::Relation ContentSettingsPattern::CompareHost(
566    const ContentSettingsPattern::PatternParts& parts,
567    const ContentSettingsPattern::PatternParts& other_parts) {
568  if (!parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
569    // Case 1: No host starts with a wild card
570    int result = CompareDomainNames(parts.host, other_parts.host);
571    if (result == 0)
572      return ContentSettingsPattern::IDENTITY;
573    if (result < 0)
574      return ContentSettingsPattern::DISJOINT_ORDER_PRE;
575    return ContentSettingsPattern::DISJOINT_ORDER_POST;
576  } else if (parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
577    // Case 2: |host| starts with a domain wildcard and |other_host| does not
578    // start with a domain wildcard.
579    // Examples:
580    // "this" host:   [*.]google.com
581    // "other" host:  google.com
582    //
583    // [*.]google.com
584    // mail.google.com
585    //
586    // [*.]mail.google.com
587    // google.com
588    //
589    // [*.]youtube.com
590    // google.de
591    //
592    // [*.]youtube.com
593    // mail.google.com
594    //
595    // *
596    // google.de
597    if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
598      return ContentSettingsPattern::SUCCESSOR;
599    } else {
600       if (CompareDomainNames(parts.host, other_parts.host) < 0)
601         return ContentSettingsPattern::DISJOINT_ORDER_PRE;
602       return ContentSettingsPattern::DISJOINT_ORDER_POST;
603    }
604  } else if (!parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
605    // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts
606    // with a domain wildcard.
607    if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
608      return ContentSettingsPattern::PREDECESSOR;
609    } else {
610      if (CompareDomainNames(parts.host, other_parts.host) < 0)
611        return ContentSettingsPattern::DISJOINT_ORDER_PRE;
612      return ContentSettingsPattern::DISJOINT_ORDER_POST;
613    }
614  } else if (parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
615    // Case 4: |host| and |other_host| both start with a domain wildcard.
616    // Examples:
617    // [*.]google.com
618    // [*.]google.com
619    //
620    // [*.]google.com
621    // [*.]mail.google.com
622    //
623    // [*.]youtube.com
624    // [*.]google.de
625    //
626    // [*.]youtube.com
627    // [*.]mail.google.com
628    //
629    // [*.]youtube.com
630    // *
631    //
632    // *
633    // [*.]youtube.com
634    if (parts.host == other_parts.host) {
635      return ContentSettingsPattern::IDENTITY;
636    } else if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
637      return ContentSettingsPattern::SUCCESSOR;
638    } else if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
639      return ContentSettingsPattern::PREDECESSOR;
640    } else {
641      if (CompareDomainNames(parts.host, other_parts.host) < 0)
642        return ContentSettingsPattern::DISJOINT_ORDER_PRE;
643      return ContentSettingsPattern::DISJOINT_ORDER_POST;
644    }
645  }
646
647  NOTREACHED();
648  return ContentSettingsPattern::IDENTITY;
649}
650
651// static
652ContentSettingsPattern::Relation ContentSettingsPattern::CompareScheme(
653    const ContentSettingsPattern::PatternParts& parts,
654    const ContentSettingsPattern::PatternParts& other_parts) {
655  if (parts.is_scheme_wildcard && !other_parts.is_scheme_wildcard)
656    return ContentSettingsPattern::SUCCESSOR;
657  if (!parts.is_scheme_wildcard && other_parts.is_scheme_wildcard)
658    return ContentSettingsPattern::PREDECESSOR;
659
660  int result = parts.scheme.compare(other_parts.scheme);
661  if (result == 0)
662    return ContentSettingsPattern::IDENTITY;
663  if (result > 0)
664    return ContentSettingsPattern::DISJOINT_ORDER_PRE;
665  return ContentSettingsPattern::DISJOINT_ORDER_POST;
666}
667
668// static
669ContentSettingsPattern::Relation ContentSettingsPattern::ComparePort(
670    const ContentSettingsPattern::PatternParts& parts,
671    const ContentSettingsPattern::PatternParts& other_parts) {
672  if (parts.is_port_wildcard && !other_parts.is_port_wildcard)
673    return ContentSettingsPattern::SUCCESSOR;
674  if (!parts.is_port_wildcard && other_parts.is_port_wildcard)
675    return ContentSettingsPattern::PREDECESSOR;
676
677  int result = parts.port.compare(other_parts.port);
678  if (result == 0)
679    return ContentSettingsPattern::IDENTITY;
680  if (result > 0)
681    return ContentSettingsPattern::DISJOINT_ORDER_PRE;
682  return ContentSettingsPattern::DISJOINT_ORDER_POST;
683}
684