content_settings_pattern.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/common/content_settings_pattern.h"
6
7#include <vector>
8
9#include "base/memory/scoped_ptr.h"
10#include "base/string_split.h"
11#include "base/string_util.h"
12#include "chrome/common/content_settings_pattern_parser.h"
13#include "chrome/common/render_messages.h"
14#include "chrome/common/url_constants.h"
15#include "googleurl/src/gurl.h"
16#include "googleurl/src/url_canon.h"
17#include "ipc/ipc_message_utils.h"
18#include "net/base/dns_util.h"
19#include "net/base/net_util.h"
20
21namespace {
22
23std::string GetDefaultPort(const std::string& scheme) {
24  if (scheme == chrome::kHttpScheme)
25    return "80";
26  if (scheme == chrome::kHttpsScheme)
27    return "443";
28  return "";
29}
30
31// Returns true if |sub_domain| is a sub domain or equls |domain|.  E.g.
32// "mail.google.com" is a sub domain of "google.com" but "evilhost.com" is not a
33// subdomain of "host.com".
34bool IsSubDomainOrEqual(const std::string& sub_domain,
35                        const std::string& domain) {
36  // The empty string serves as wildcard. Each domain is a subdomain of the
37  // wildcard.
38  if (domain.empty())
39    return true;
40  const size_t match = sub_domain.rfind(domain);
41  if (match == std::string::npos ||
42      (match > 0 && sub_domain[match - 1] != '.') ||
43      (match + domain.length() != sub_domain.length())) {
44    return false;
45  }
46  return true;
47}
48
49// Compares two domain names.
50int CompareDomainNames(const std::string& str1, const std::string& str2) {
51  std::vector<std::string> domain_name1;
52  std::vector<std::string> domain_name2;
53
54  base::SplitString(str1, '.', &domain_name1);
55  base::SplitString(str2, '.', &domain_name2);
56
57  int i1 = domain_name1.size() - 1;
58  int i2 = domain_name2.size() - 1;
59  int rv;
60  while (i1 >= 0 && i2 >= 0) {
61    // domain names are stored in puny code. So it's fine to use the compare
62    // method.
63    rv = domain_name1[i1].compare(domain_name2[i2]);
64    if (rv != 0)
65      return rv;
66    --i1;
67    --i2;
68  }
69
70  if (i1 > i2)
71    return 1;
72
73  if (i1 < i2)
74    return -1;
75
76  // The domain names are identical.
77  return 0;
78}
79
80typedef ContentSettingsPattern::BuilderInterface BuilderInterface;
81
82}  // namespace
83
84// ////////////////////////////////////////////////////////////////////////////
85// ContentSettingsPattern::Builder
86//
87ContentSettingsPattern::Builder::Builder(bool use_legacy_validate)
88    : is_valid_(true),
89      use_legacy_validate_(use_legacy_validate) {}
90
91ContentSettingsPattern::Builder::~Builder() {}
92
93BuilderInterface* ContentSettingsPattern::Builder::WithPort(
94    const std::string& port) {
95  parts_.port = port;
96  parts_.is_port_wildcard = false;
97  return this;
98}
99
100BuilderInterface* ContentSettingsPattern::Builder::WithPortWildcard() {
101  parts_.port = "";
102  parts_.is_port_wildcard = true;
103  return this;
104}
105
106BuilderInterface* ContentSettingsPattern::Builder::WithHost(
107    const std::string& host) {
108  parts_.host = host;
109  return this;
110}
111
112BuilderInterface* ContentSettingsPattern::Builder::WithDomainWildcard() {
113  parts_.has_domain_wildcard = true;
114  return this;
115}
116
117BuilderInterface* ContentSettingsPattern::Builder::WithScheme(
118    const std::string& scheme) {
119  parts_.scheme = scheme;
120  parts_.is_scheme_wildcard = false;
121  return this;
122}
123
124BuilderInterface* ContentSettingsPattern::Builder::WithSchemeWildcard() {
125  parts_.scheme = "";
126  parts_.is_scheme_wildcard = true;
127  return this;
128}
129
130BuilderInterface* ContentSettingsPattern::Builder::WithPath(
131    const std::string& path) {
132  parts_.path = path;
133  parts_.is_path_wildcard = false;
134  return this;
135}
136
137BuilderInterface* ContentSettingsPattern::Builder::WithPathWildcard() {
138  parts_.path = "";
139  parts_.is_path_wildcard = true;
140  return this;
141}
142
143BuilderInterface* ContentSettingsPattern::Builder::Invalid() {
144  is_valid_ = false;
145  return this;
146}
147
148ContentSettingsPattern ContentSettingsPattern::Builder::Build() {
149  if (!is_valid_)
150    return ContentSettingsPattern();
151  if (!Canonicalize(&parts_))
152    return ContentSettingsPattern();
153  if (use_legacy_validate_) {
154    is_valid_ = LegacyValidate(parts_);
155  } else {
156    is_valid_ = Validate(parts_);
157  }
158  return ContentSettingsPattern(parts_, is_valid_);
159}
160
161// static
162bool ContentSettingsPattern::Builder::Canonicalize(PatternParts* parts) {
163  // Canonicalize the scheme part.
164  const std::string scheme(StringToLowerASCII(parts->scheme));
165  parts->scheme = scheme;
166
167  if (parts->scheme == std::string(chrome::kFileScheme) &&
168      !parts->is_path_wildcard) {
169      GURL url(std::string(chrome::kFileScheme) +
170               std::string(content::kStandardSchemeSeparator) + parts->path);
171      parts->path = url.path();
172  }
173
174  // Canonicalize the host part.
175  const std::string host(parts->host);
176  url_canon::CanonHostInfo host_info;
177  std::string canonicalized_host(net::CanonicalizeHost(host, &host_info));
178  if (host_info.IsIPAddress() && parts->has_domain_wildcard)
179    return false;
180  canonicalized_host = net::TrimEndingDot(canonicalized_host);
181
182  parts->host = "";
183  if ((host.find('*') == std::string::npos) &&
184      !canonicalized_host.empty()) {
185    // Valid host.
186    parts->host += canonicalized_host;
187  }
188  return true;
189}
190
191// static
192bool ContentSettingsPattern::Builder::Validate(const PatternParts& parts) {
193  // Sanity checks first: {scheme, port} wildcards imply empty {scheme, port}.
194  if ((parts.is_scheme_wildcard && !parts.scheme.empty()) ||
195      (parts.is_port_wildcard && !parts.port.empty())) {
196    NOTREACHED();
197    return false;
198  }
199
200  // file:// URL patterns have an empty host and port.
201  if (parts.scheme == std::string(chrome::kFileScheme)) {
202    if (parts.has_domain_wildcard || !parts.host.empty() || !parts.port.empty())
203      return false;
204    if (parts.is_path_wildcard)
205      return parts.path.empty();
206    return (!parts.path.empty() &&
207            parts.path != "/" &&
208            parts.path.find("*") == std::string::npos);
209  }
210
211  // If the pattern is for an extension URL test if it is valid.
212  if (parts.scheme == std::string(chrome::kExtensionScheme) &&
213      parts.port.empty() &&
214      !parts.is_port_wildcard) {
215    return true;
216  }
217
218  // Non-file patterns are invalid if either the scheme, host or port part is
219  // empty.
220  if ((parts.scheme.empty() && !parts.is_scheme_wildcard) ||
221      (parts.host.empty() && !parts.has_domain_wildcard) ||
222      (parts.port.empty() && !parts.is_port_wildcard)) {
223    return false;
224  }
225
226  if (parts.host.find("*") != std::string::npos)
227    return false;
228
229  // Test if the scheme is supported or a wildcard.
230  if (!parts.is_scheme_wildcard &&
231      parts.scheme != std::string(chrome::kHttpScheme) &&
232      parts.scheme != std::string(chrome::kHttpsScheme)) {
233    return false;
234  }
235  return true;
236}
237
238// static
239bool ContentSettingsPattern::Builder::LegacyValidate(
240    const PatternParts& parts) {
241  // If the pattern is for a "file-pattern" test if it is valid.
242  if (parts.scheme == std::string(chrome::kFileScheme) &&
243      !parts.is_scheme_wildcard &&
244      parts.host.empty() &&
245      parts.port.empty())
246    return true;
247
248  // If the pattern is for an extension URL test if it is valid.
249  if (parts.scheme == std::string(chrome::kExtensionScheme) &&
250      !parts.is_scheme_wildcard &&
251      !parts.host.empty() &&
252      !parts.has_domain_wildcard &&
253      parts.port.empty() &&
254      !parts.is_port_wildcard)
255    return true;
256
257  // Non-file patterns are invalid if either the scheme, host or port part is
258  // empty.
259  if ((!parts.is_scheme_wildcard) ||
260      (parts.host.empty() && !parts.has_domain_wildcard) ||
261      (!parts.is_port_wildcard))
262    return false;
263
264  // Test if the scheme is supported or a wildcard.
265  if (!parts.is_scheme_wildcard &&
266      parts.scheme != std::string(chrome::kHttpScheme) &&
267      parts.scheme != std::string(chrome::kHttpsScheme)) {
268    return false;
269  }
270  return true;
271}
272
273// ////////////////////////////////////////////////////////////////////////////
274// ContentSettingsPattern::PatternParts
275//
276ContentSettingsPattern::PatternParts::PatternParts()
277        : is_scheme_wildcard(false),
278          has_domain_wildcard(false),
279          is_port_wildcard(false),
280          is_path_wildcard(false) {}
281
282ContentSettingsPattern::PatternParts::~PatternParts() {}
283
284// ////////////////////////////////////////////////////////////////////////////
285// ContentSettingsPattern
286//
287
288// The version of the pattern format implemented. Version 1 includes the
289// following patterns:
290//   - [*.]domain.tld (matches domain.tld and all sub-domains)
291//   - host (matches an exact hostname)
292//   - a.b.c.d (matches an exact IPv4 ip)
293//   - [a:b:c:d:e:f:g:h] (matches an exact IPv6 ip)
294//   - file:///tmp/test.html (a complete URL without a host)
295// Version 2 adds a resource identifier for plugins.
296// TODO(jochen): update once this feature is no longer behind a flag.
297const int ContentSettingsPattern::kContentSettingsPatternVersion = 1;
298
299// TODO(markusheintz): These two constants were moved to the Pattern Parser.
300// Remove once the dependency of the ContentSettingsBaseProvider is removed.
301const char* ContentSettingsPattern::kDomainWildcard = "[*.]";
302const size_t ContentSettingsPattern::kDomainWildcardLength = 4;
303
304// static
305BuilderInterface* ContentSettingsPattern::CreateBuilder(
306    bool validate) {
307  return new Builder(validate);
308}
309
310// static
311ContentSettingsPattern ContentSettingsPattern::FromURL(
312    const GURL& url) {
313  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
314      ContentSettingsPattern::CreateBuilder(false));
315
316  const GURL* local_url = &url;
317  if (url.SchemeIsFileSystem() && url.inner_url()) {
318    local_url = url.inner_url();
319  }
320  if (local_url->SchemeIsFile()) {
321    builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
322  } else {
323    // Please keep the order of the ifs below as URLs with an IP as host can
324    // also have a "http" scheme.
325    if (local_url->HostIsIPAddress()) {
326      builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
327    } else if (local_url->SchemeIs(chrome::kHttpScheme)) {
328      builder->WithSchemeWildcard()->WithDomainWildcard()->WithHost(
329          local_url->host());
330    } else if (local_url->SchemeIs(chrome::kHttpsScheme)) {
331      builder->WithScheme(local_url->scheme())->WithDomainWildcard()->WithHost(
332          local_url->host());
333    } else {
334      // Unsupported scheme
335    }
336    if (local_url->port().empty()) {
337      if (local_url->SchemeIs(chrome::kHttpsScheme))
338        builder->WithPort(GetDefaultPort(chrome::kHttpsScheme));
339      else
340        builder->WithPortWildcard();
341    } else {
342      builder->WithPort(local_url->port());
343    }
344  }
345  return builder->Build();
346}
347
348// static
349ContentSettingsPattern ContentSettingsPattern::FromURLNoWildcard(
350    const GURL& url) {
351  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
352      ContentSettingsPattern::CreateBuilder(false));
353
354  const GURL* local_url = &url;
355  if (url.SchemeIsFileSystem() && url.inner_url()) {
356    local_url = url.inner_url();
357  }
358  if (local_url->SchemeIsFile()) {
359    builder->WithScheme(local_url->scheme())->WithPath(local_url->path());
360  } else {
361    builder->WithScheme(local_url->scheme())->WithHost(local_url->host());
362    if (local_url->port().empty()) {
363      builder->WithPort(GetDefaultPort(local_url->scheme()));
364    } else {
365      builder->WithPort(local_url->port());
366    }
367  }
368  return builder->Build();
369}
370
371// static
372ContentSettingsPattern ContentSettingsPattern::FromString(
373    const std::string& pattern_spec) {
374  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
375      ContentSettingsPattern::CreateBuilder(false));
376  content_settings::PatternParser::Parse(pattern_spec, builder.get());
377  return builder->Build();
378}
379
380// static
381ContentSettingsPattern ContentSettingsPattern::LegacyFromString(
382    const std::string& pattern_spec) {
383  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
384      ContentSettingsPattern::CreateBuilder(true));
385  content_settings::PatternParser::Parse(pattern_spec, builder.get());
386  return builder->Build();
387}
388
389// static
390ContentSettingsPattern ContentSettingsPattern::Wildcard() {
391  scoped_ptr<ContentSettingsPattern::BuilderInterface> builder(
392      ContentSettingsPattern::CreateBuilder(true));
393  builder->WithSchemeWildcard()->WithDomainWildcard()->WithPortWildcard()->
394           WithPathWildcard();
395  return builder->Build();
396}
397
398ContentSettingsPattern::ContentSettingsPattern()
399  : is_valid_(false) {
400}
401
402ContentSettingsPattern::ContentSettingsPattern(
403    const PatternParts& parts,
404    bool valid)
405    : parts_(parts),
406      is_valid_(valid) {
407}
408
409void ContentSettingsPattern::WriteToMessage(IPC::Message* m) const {
410  IPC::WriteParam(m, is_valid_);
411  IPC::WriteParam(m, parts_);
412}
413
414bool ContentSettingsPattern::ReadFromMessage(const IPC::Message* m,
415                                             PickleIterator* iter) {
416  return IPC::ReadParam(m, iter, &is_valid_) &&
417         IPC::ReadParam(m, iter, &parts_);
418}
419
420bool ContentSettingsPattern::Matches(
421    const GURL& url) const {
422  // An invalid pattern matches nothing.
423  if (!is_valid_)
424    return false;
425
426  const GURL* local_url = &url;
427  if (url.SchemeIsFileSystem() && url.inner_url()) {
428    local_url = url.inner_url();
429  }
430
431  // Match the scheme part.
432  const std::string scheme(local_url->scheme());
433  if (!parts_.is_scheme_wildcard &&
434      parts_.scheme != scheme) {
435    return false;
436  }
437
438  // File URLs have no host. Matches if the pattern has the path wildcard set,
439  // or if the path in the URL is identical to the one in the pattern.
440  // For filesystem:file URLs, the path used is the filesystem type, so all
441  // filesystem:file:///temporary/... are equivalent.
442  // TODO(markusheintz): Content settings should be defined for all files on
443  // a machine. Unless there is a good use case for supporting paths for file
444  // patterns, stop supporting path for file patterns.
445  if (!parts_.is_scheme_wildcard && scheme == chrome::kFileScheme)
446    return parts_.is_path_wildcard ||
447        parts_.path == std::string(local_url->path());
448
449  // Match the host part.
450  const std::string host(net::TrimEndingDot(local_url->host()));
451  if (!parts_.has_domain_wildcard) {
452    if (parts_.host != host)
453      return false;
454  } else {
455    if (!IsSubDomainOrEqual(host, parts_.host))
456      return false;
457  }
458
459  // For chrome extensions URLs ignore the port.
460  if (parts_.scheme == std::string(chrome::kExtensionScheme))
461    return true;
462
463  // Match the port part.
464  std::string port(local_url->port());
465
466  // Use the default port if the port string is empty. GURL returns an empty
467  // string if no port at all was specified or if the default port was
468  // specified.
469  if (port.empty()) {
470    port = GetDefaultPort(scheme);
471  }
472
473  if (!parts_.is_port_wildcard &&
474      parts_.port != port ) {
475    return false;
476  }
477
478  return true;
479}
480
481bool ContentSettingsPattern::MatchesAllHosts() const {
482  return parts_.has_domain_wildcard && parts_.host.empty();
483}
484
485const std::string ContentSettingsPattern::ToString() const {
486  if (IsValid())
487    return content_settings::PatternParser::ToString(parts_);
488  else
489    return "";
490}
491
492ContentSettingsPattern::Relation ContentSettingsPattern::Compare(
493    const ContentSettingsPattern& other) const {
494  // Two invalid patterns are identical in the way they behave. They don't match
495  // anything and are represented as an empty string. So it's fair to treat them
496  // as identical.
497  if ((this == &other) ||
498      (!is_valid_ && !other.is_valid_))
499    return IDENTITY;
500
501  if (!is_valid_ && other.is_valid_)
502    return DISJOINT_ORDER_POST;
503  if (is_valid_ && !other.is_valid_)
504    return DISJOINT_ORDER_PRE;
505
506  // If either host, port or scheme are disjoint return immediately.
507  Relation host_relation = CompareHost(parts_, other.parts_);
508  if (host_relation == DISJOINT_ORDER_PRE ||
509      host_relation == DISJOINT_ORDER_POST)
510    return host_relation;
511
512  Relation port_relation = ComparePort(parts_, other.parts_);
513  if (port_relation == DISJOINT_ORDER_PRE ||
514      port_relation == DISJOINT_ORDER_POST)
515    return port_relation;
516
517  Relation scheme_relation = CompareScheme(parts_, other.parts_);
518  if (scheme_relation == DISJOINT_ORDER_PRE ||
519      scheme_relation == DISJOINT_ORDER_POST)
520    return scheme_relation;
521
522  if (host_relation != IDENTITY)
523    return host_relation;
524  if (port_relation != IDENTITY)
525    return port_relation;
526  return scheme_relation;
527}
528
529bool ContentSettingsPattern::operator==(
530    const ContentSettingsPattern& other) const {
531  return Compare(other) == IDENTITY;
532}
533
534bool ContentSettingsPattern::operator!=(
535    const ContentSettingsPattern& other) const {
536  return !(*this == other);
537}
538
539bool ContentSettingsPattern::operator<(
540    const ContentSettingsPattern& other) const {
541  return Compare(other) < 0;
542}
543
544bool ContentSettingsPattern::operator>(
545    const ContentSettingsPattern& other) const {
546  return Compare(other) > 0;
547}
548
549// static
550ContentSettingsPattern::Relation ContentSettingsPattern::CompareHost(
551    const ContentSettingsPattern::PatternParts& parts,
552    const ContentSettingsPattern::PatternParts& other_parts) {
553  if (!parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
554    // Case 1: No host starts with a wild card
555    int result = CompareDomainNames(parts.host, other_parts.host);
556    if (result == 0)
557      return ContentSettingsPattern::IDENTITY;
558    if (result < 0)
559      return ContentSettingsPattern::DISJOINT_ORDER_PRE;
560    return ContentSettingsPattern::DISJOINT_ORDER_POST;
561  } else if (parts.has_domain_wildcard && !other_parts.has_domain_wildcard) {
562    // Case 2: |host| starts with a domain wildcard and |other_host| does not
563    // start with a domain wildcard.
564    // Examples:
565    // "this" host:   [*.]google.com
566    // "other" host:  google.com
567    //
568    // [*.]google.com
569    // mail.google.com
570    //
571    // [*.]mail.google.com
572    // google.com
573    //
574    // [*.]youtube.com
575    // google.de
576    //
577    // [*.]youtube.com
578    // mail.google.com
579    //
580    // *
581    // google.de
582    if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
583      return ContentSettingsPattern::SUCCESSOR;
584    } else {
585       if (CompareDomainNames(parts.host, other_parts.host) < 0)
586         return ContentSettingsPattern::DISJOINT_ORDER_PRE;
587       return ContentSettingsPattern::DISJOINT_ORDER_POST;
588    }
589  } else if (!parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
590    // Case 3: |host| starts NOT with a domain wildcard and |other_host| starts
591    // with a domain wildcard.
592    if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
593      return ContentSettingsPattern::PREDECESSOR;
594    } else {
595      if (CompareDomainNames(parts.host, other_parts.host) < 0)
596        return ContentSettingsPattern::DISJOINT_ORDER_PRE;
597      return ContentSettingsPattern::DISJOINT_ORDER_POST;
598    }
599  } else if (parts.has_domain_wildcard && other_parts.has_domain_wildcard) {
600    // Case 4: |host| and |other_host| both start with a domain wildcard.
601    // Examples:
602    // [*.]google.com
603    // [*.]google.com
604    //
605    // [*.]google.com
606    // [*.]mail.google.com
607    //
608    // [*.]youtube.com
609    // [*.]google.de
610    //
611    // [*.]youtube.com
612    // [*.]mail.google.com
613    //
614    // [*.]youtube.com
615    // *
616    //
617    // *
618    // [*.]youtube.com
619    if (parts.host == other_parts.host) {
620      return ContentSettingsPattern::IDENTITY;
621    } else if (IsSubDomainOrEqual(other_parts.host, parts.host)) {
622      return ContentSettingsPattern::SUCCESSOR;
623    } else if (IsSubDomainOrEqual(parts.host, other_parts.host)) {
624      return ContentSettingsPattern::PREDECESSOR;
625    } else {
626      if (CompareDomainNames(parts.host, other_parts.host) < 0)
627        return ContentSettingsPattern::DISJOINT_ORDER_PRE;
628      return ContentSettingsPattern::DISJOINT_ORDER_POST;
629    }
630  }
631
632  NOTREACHED();
633  return ContentSettingsPattern::IDENTITY;
634}
635
636// static
637ContentSettingsPattern::Relation ContentSettingsPattern::CompareScheme(
638    const ContentSettingsPattern::PatternParts& parts,
639    const ContentSettingsPattern::PatternParts& other_parts) {
640  if (parts.is_scheme_wildcard && !other_parts.is_scheme_wildcard)
641    return ContentSettingsPattern::SUCCESSOR;
642  if (!parts.is_scheme_wildcard && other_parts.is_scheme_wildcard)
643    return ContentSettingsPattern::PREDECESSOR;
644
645  int result = parts.scheme.compare(other_parts.scheme);
646  if (result == 0)
647    return ContentSettingsPattern::IDENTITY;
648  if (result > 0)
649    return ContentSettingsPattern::DISJOINT_ORDER_PRE;
650  return ContentSettingsPattern::DISJOINT_ORDER_POST;
651}
652
653// static
654ContentSettingsPattern::Relation ContentSettingsPattern::ComparePort(
655    const ContentSettingsPattern::PatternParts& parts,
656    const ContentSettingsPattern::PatternParts& other_parts) {
657  if (parts.is_port_wildcard && !other_parts.is_port_wildcard)
658    return ContentSettingsPattern::SUCCESSOR;
659  if (!parts.is_port_wildcard && other_parts.is_port_wildcard)
660    return ContentSettingsPattern::PREDECESSOR;
661
662  int result = parts.port.compare(other_parts.port);
663  if (result == 0)
664    return ContentSettingsPattern::IDENTITY;
665  if (result > 0)
666    return ContentSettingsPattern::DISJOINT_ORDER_PRE;
667  return ContentSettingsPattern::DISJOINT_ORDER_POST;
668}
669