1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott/* ***** BEGIN LICENSE BLOCK *****
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The contents of this file are subject to the Mozilla Public License Version
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 1.1 (the "License"); you may not use this file except in compliance with
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the License. You may obtain a copy of the License at
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * http://www.mozilla.org/MPL/
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Software distributed under the License is distributed on an "AS IS" basis,
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * for the specific language governing rights and limitations under the
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * License.
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The Original Code is Mozilla TLD Service
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The Initial Developer of the Original Code is
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Google Inc.
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Portions created by the Initial Developer are Copyright (C) 2006
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the Initial Developer. All Rights Reserved.
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Contributor(s):
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *   Pamela Greene <pamg.bugs@gmail.com> (original author)
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Alternatively, the contents of this file may be used under the terms of
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * either the GNU General Public License Version 2 or later (the "GPL"), or
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * in which case the provisions of the GPL or the LGPL are applicable instead
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * of those above. If you wish to allow use of your version of this file only
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * under the terms of either the GPL or the LGPL, and not to allow others to
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * use your version of this file under the terms of the MPL, indicate your
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * decision by deleting the provisions above and replace them with the notice
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * and other provisions required by the GPL or the LGPL. If you do not delete
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the provisions above, a recipient may use your version of this file under
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the terms of any one of the MPL, the GPL or the LGPL.
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * ***** END LICENSE BLOCK ***** */
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// NB: Modelled after Mozilla's code (originally written by Pamela Greene,
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// later modified by others), but almost entirely rewritten for Chrome.
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott/*
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  (Documentation based on the Mozilla documentation currently at
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  http://wiki.mozilla.org/Gecko:Effective_TLD_Service, written by the same
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  author.)
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  The RegistryControlledDomainService examines the hostname of a GURL passed to
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  it and determines the longest portion that is controlled by a registrar.
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  Although technically the top-level domain (TLD) for a hostname is the last
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  dot-portion of the name (such as .com or .org), many domains (such as co.uk)
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  function as though they were TLDs, allocating any number of more specific,
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  essentially unrelated names beneath them.  For example, .uk is a TLD, but
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  nobody is allowed to register a domain directly under .uk; the "effective"
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  TLDs are ac.uk, co.uk, and so on.  We wouldn't want to allow any site in
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  *.co.uk to set a cookie for the entire co.uk domain, so it's important to be
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  able to identify which higher-level domains function as effective TLDs and
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  which can be registered.
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  The service obtains its information about effective TLDs from a text resource
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  that must be in the following format:
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * It should use plain ASCII.
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * It should contain one domain rule per line, terminated with \n, with nothing
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    else on the line.  (The last rule in the file may omit the ending \n.)
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * Rules should have been normalized using the same canonicalization that GURL
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    applies.  For ASCII, that means they're not case-sensitive, among other
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    things; other normalizations are applied for other characters.
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * Each rule should list the entire TLD-like domain name, with any subdomain
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    portions separated by dots (.) as usual.
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * Rules should neither begin nor end with a dot.
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * If a hostname matches more than one rule, the most specific rule (that is,
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    the one with more dot-levels) will be used.
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * Other than in the case of wildcards (see below), rules do not implicitly
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    include their subcomponents.  For example, "bar.baz.uk" does not imply
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    "baz.uk", and if "bar.baz.uk" is the only rule in the list, "foo.bar.baz.uk"
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    will match, but "baz.uk" and "qux.baz.uk" won't.
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * The wildcard character '*' will match any valid sequence of characters.
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * Wildcards may only appear as the entire most specific level of a rule.  That
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    is, a wildcard must come at the beginning of a line and must be followed by
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    a dot.  (You may not use a wildcard as the entire rule.)
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * A wildcard rule implies a rule for the entire non-wildcard portion.  For
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    example, the rule "*.foo.bar" implies the rule "foo.bar" (but not the rule
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    "bar").  This is typically important in the case of exceptions (see below).
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * The exception character '!' before a rule marks an exception to a wildcard
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    rule.  If your rules are "*.tokyo.jp" and "!pref.tokyo.jp", then
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    "a.b.tokyo.jp" has an effective TLD of "b.tokyo.jp", but "a.pref.tokyo.jp"
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    has an effective TLD of "tokyo.jp" (the exception prevents the wildcard
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    match, and we thus fall through to matching on the implied "tokyo.jp" rule
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    from the wildcard).
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  * If you use an exception rule without a corresponding wildcard rule, the
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    behavior is undefined.
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  Firefox has a very similar service, and it's their data file we use to
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  construct our resource.  However, the data expected by this implementation
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  differs from the Mozilla file in several important ways:
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   (1) We require that all single-level TLDs (com, edu, etc.) be explicitly
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott       listed.  As of this writing, Mozilla's file includes the single-level
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott       TLDs too, but that might change.
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   (2) Our data is expected be in pure ASCII: all UTF-8 or otherwise encoded
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott       items must already have been normalized.
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   (3) We do not allow comments, rule notes, blank lines, or line endings other
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott       than LF.
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  Rules are also expected to be syntactically valid.
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  The utility application tld_cleanup.exe converts a Mozilla-style file into a
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  Chrome one, making sure that single-level TLDs are explicitly listed, using
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  GURL to normalize rules, and validating the rules.
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott*/
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H_
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H_
1123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string>
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/basictypes.h"
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass GURL;
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <typename T>
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstruct DefaultSingletonTraits;
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstruct DomainRule;
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace net {
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstruct RegistryControlledDomainServiceSingletonTraits;
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This class is a singleton.
129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass RegistryControlledDomainService {
130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   ~RegistryControlledDomainService() { }
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Returns the registered, organization-identifying host and all its registry
134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // information, but no subdomains, from the given GURL.  Returns an empty
135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // string if the GURL is invalid, has no host (e.g. a file: URL), has multiple
136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // trailing dots, is an IP address, has only one subcomponent (i.e. no dots
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // other than leading/trailing ones), or is itself a recognized registry
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // identifier.  If no matching rule is found in the effective-TLD data (or in
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the default data, if the resource failed to load), the last subcomponent of
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the host is assumed to be the registry.
141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Examples:
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://www.google.com/file.html -> "google.com"  (com)
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://..google.com/file.html   -> "google.com"  (com)
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://google.com./file.html    -> "google.com." (com)
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://a.b.co.uk/file.html      -> "b.co.uk"     (co.uk)
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   file:///C:/bar.html             -> ""            (no host)
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://foo.com../file.html      -> ""            (multiple trailing dots)
149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://192.168.0.1/file.html    -> ""            (IP address)
150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://bar/file.html            -> ""            (no subcomponents)
151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://co.uk/file.html          -> ""            (host is a registry)
152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://foo.bar/file.html        -> "foo.bar"     (no rule; assume bar)
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static std::string GetDomainAndRegistry(const GURL& gurl);
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Like the GURL version, but takes a host (which is canonicalized internally)
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // instead of a full GURL.
157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static std::string GetDomainAndRegistry(const std::string& host);
158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static std::string GetDomainAndRegistry(const std::wstring& host);
159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This convenience function returns true if the two GURLs both have hosts
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // and one of the following is true:
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // * They each have a known domain and registry, and it is the same for both
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   URLs.  Note that this means the trailing dot, if any, must match too.
164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // * They don't have known domains/registries, but the hosts are identical.
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Effectively, callers can use this function to check whether the input URLs
166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // represent hosts "on the same site".
167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static bool SameDomainOrHost(const GURL& gurl1, const GURL& gurl2);
168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Finds the length in bytes of the registrar portion of the host in the
170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // given GURL.  Returns std::string::npos if the GURL is invalid or has no
171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // host (e.g. a file: URL).  Returns 0 if the GURL has multiple trailing dots,
172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // is an IP address, has no subcomponents, or is itself a recognized registry
173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // identifier.  If no matching rule is found in the effective-TLD data (or in
174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the default data, if the resource failed to load), returns 0 if
175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // |allow_unknown_registries| is false, or the length of the last subcomponent
176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // if |allow_unknown_registries| is true.
177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
178c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Examples:
179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://www.google.com/file.html -> 3                 (com)
180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://..google.com/file.html   -> 3                 (com)
181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://google.com./file.html    -> 4                 (com)
182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://a.b.co.uk/file.html      -> 5                 (co.uk)
183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   file:///C:/bar.html             -> std::string::npos (no host)
184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://foo.com../file.html      -> 0                 (multiple trailing
185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //                                                         dots)
186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://192.168.0.1/file.html    -> 0                 (IP address)
187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://bar/file.html            -> 0                 (no subcomponents)
188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://co.uk/file.html          -> 0                 (host is a registry)
189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   http://foo.bar/file.html        -> 0 or 3, depending (no rule; assume
190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //                                                         bar)
191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static size_t GetRegistryLength(const GURL& gurl,
192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                  bool allow_unknown_registries);
193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Like the GURL version, but takes a host (which is canonicalized internally)
195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // instead of a full GURL.
196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static size_t GetRegistryLength(const std::string& host,
197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                  bool allow_unknown_registries);
198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static size_t GetRegistryLength(const std::wstring& host,
199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                  bool allow_unknown_registries);
200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
20121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  // Returns the singleton instance, after attempting to initialize it.
20221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  // NOTE that if the effective-TLD data resource can't be found, the instance
20321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  // will be initialized and continue operation with simple default TLD data.
20421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  static RegistryControlledDomainService* GetInstance();
20521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott protected:
2073f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen  typedef const struct DomainRule* (*FindDomainPtr)(const char *, unsigned int);
2083f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen
209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The entire protected API is only for unit testing.  I mean it.  Don't make
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // me come over there!
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  RegistryControlledDomainService();
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Set the RegistryControledDomainService instance to be used internally.
214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // |instance| will supersede the singleton instance normally used.  If
215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // |instance| is NULL, normal behavior is restored, and internal operations
216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // will return to using the singleton.  This function always returns the
217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // instance set by the most recent call to SetInstance.
218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static RegistryControlledDomainService* SetInstance(
219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      RegistryControlledDomainService* instance);
220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Used for unit tests, so that a different perfect hash map from the full
222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // list is used.
223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void UseFindDomainFunction(FindDomainPtr function);
224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // To allow construction of the internal singleton instance.
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  friend struct DefaultSingletonTraits<RegistryControlledDomainService>;
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Internal workings of the static public methods.  See above.
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static std::string GetDomainAndRegistryImpl(const std::string& host);
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t GetRegistryLengthImpl(const std::string& host,
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                               bool allow_unknown_registries);
233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Function that returns a DomainRule given a domain.
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  FindDomainPtr find_domain_function_;
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DISALLOW_COPY_AND_ASSIGN(RegistryControlledDomainService);
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace net
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif  // NET_BASE_REGISTRY_CONTROLLED_DOMAIN_H_
243