1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2007, Google Inc.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// All rights reserved.
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Redistribution and use in source and binary forms, with or without
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// modification, are permitted provided that the following conditions are
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// met:
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     * Redistributions of source code must retain the above copyright
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// notice, this list of conditions and the following disclaimer.
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     * Redistributions in binary form must reproduce the above
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// copyright notice, this list of conditions and the following disclaimer
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// in the documentation and/or other materials provided with the
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distribution.
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     * Neither the name of Google Inc. nor the names of its
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// contributors may be used to endorse or promote products derived from
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// this software without specific prior written permission.
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string.h>
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <vector>
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_util.h"
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/logging.h"
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/url_canon_internal.h"
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_file.h"
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace url_util {
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace {
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// so we don't want to use it here.
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <class Char> inline Char ToLowerASCII(Char c) {
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Backend for LowerCaseEqualsASCII.
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename Iter>
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottinline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) {
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (Iter it = a_begin; it != a_end; ++it, ++b) {
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (!*b || ToLowerASCII(*it) != *b)
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return false;
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return *b == 0;
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst char kFileScheme[] = "file";  // Used in a number of places.
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst char kMailtoScheme[] = "mailto";
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst int kNumStandardURLSchemes = 7;
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst char* kStandardURLSchemes[kNumStandardURLSchemes] = {
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  "http",
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  "https",
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  kFileScheme,  // Yes, file urls can have a hostname!
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  "ftp",
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  "gopher",
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  "ws",  // WebSocket.
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  "wss",  // WebSocket secure.
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// List of the currently installed standard schemes. This list is lazily
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// initialized by InitStandardSchemes and is leaked on shutdown to prevent
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// any destructors from being called that will slow us down or cause problems.
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::vector<const char*>* standard_schemes = NULL;
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// See the LockStandardSchemes declaration in the header.
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool standard_schemes_locked = false;
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Ensures that the standard_schemes list is initialized, does nothing if it
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// already has values.
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid InitStandardSchemes() {
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (standard_schemes)
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return;
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  standard_schemes = new std::vector<const char*>;
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = 0; i < kNumStandardURLSchemes; i++)
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    standard_schemes->push_back(kStandardURLSchemes[i]);
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Given a string and a range inside the string, compares it to the given
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// lower-case |compare_to| buffer.
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottinline bool CompareSchemeComponent(const CHAR* spec,
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                   const url_parse::Component& component,
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                   const char* compare_to) {
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (!component.is_nonempty())
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return compare_to[0] == 0;  // When component is empty, match empty scheme.
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return LowerCaseEqualsASCII(&spec[component.begin],
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                              &spec[component.end()],
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                              compare_to);
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Returns true if the given scheme identified by |scheme| within |spec| is one
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// of the registered "standard" schemes.
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) {
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (!scheme.is_nonempty())
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return false;  // Empty or invalid schemes are non-standard.
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  InitStandardSchemes();
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < standard_schemes->size(); i++) {
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()],
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                             standard_schemes->at(i)))
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return true;
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return false;
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoFindAndCompareScheme(const CHAR* str,
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            int str_len,
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            const char* compare,
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            url_parse::Component* found_scheme) {
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Before extracting scheme, canonicalize the URL to remove any whitespace.
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // This matches the canonicalization done in DoCanonicalize function.
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_canon::RawCanonOutputT<CHAR> whitespace_buffer;
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int spec_len;
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const CHAR* spec = RemoveURLWhitespace(str, str_len,
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                         &whitespace_buffer, &spec_len);
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  url_parse::Component our_scheme;
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!url_parse::ExtractScheme(spec, spec_len, &our_scheme)) {
134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // No scheme.
135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (found_scheme)
136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      *found_scheme = url_parse::Component();
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return false;
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (found_scheme)
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *found_scheme = our_scheme;
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return CompareSchemeComponent(spec, our_scheme, compare);
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoCanonicalize(const CHAR* in_spec, int in_spec_len,
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    url_canon::CharsetConverter* charset_converter,
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    url_canon::CanonOutput* output,
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    url_parse::Parsed* output_parsed) {
149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Remove any whitespace from the middle of the relative URL, possibly
150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // copying to the new buffer.
151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  url_canon::RawCanonOutputT<CHAR> whitespace_buffer;
152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int spec_len;
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                         &whitespace_buffer, &spec_len);
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  url_parse::Parsed parsed_input;
157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifdef WIN32
158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // For Windows, we allow things that look like absolute Windows paths to be
159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // fixed up magically to file URLs. This is done for IE compatability. For
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // example, this will change "c:/foo" into a file URL rather than treating
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // There is similar logic in url_canon_relative.cc for
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // has no meaning as an absolute path name. This is because browsers on Mac
166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // & Unix don't generally do this, so there is no compatibility reason for
167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // doing so.
168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) ||
169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) {
170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    url_parse::ParseFileURL(spec, spec_len, &parsed_input);
171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input,
172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                           charset_converter,
173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                           output, output_parsed);
174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif
176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  url_parse::Component scheme;
178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!url_parse::ExtractScheme(spec, spec_len, &scheme))
179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return false;
180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This is the parsed version of the input URL, we have to canonicalize it
182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // before storing it in our object.
183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool success;
184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (CompareSchemeComponent(spec, scheme, kFileScheme)) {
185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // File URLs are special.
186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    url_parse::ParseFileURL(spec, spec_len, &parsed_input);
187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input,
188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                             charset_converter,
189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                             output, output_parsed);
190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  } else if (DoIsStandard(spec, scheme)) {
192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // All "normal" URLs.
193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    url_parse::ParseStandardURL(spec, spec_len, &parsed_input);
194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input,
195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                                 charset_converter,
196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                                 output, output_parsed);
197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) {
199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Mailto are treated like a standard url with only a scheme, path, query
200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    url_parse::ParseMailtoURL(spec, spec_len, &parsed_input);
201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input,
202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                               output, output_parsed);
203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // "Weird" URLs like data: and javascript:
206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    url_parse::ParsePathURL(spec, spec_len, &parsed_input);
207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    success = url_canon::CanonicalizePathURL(spec, spec_len, parsed_input,
208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                             output, output_parsed);
209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return success;
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoResolveRelative(const char* base_spec,
215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       int base_spec_len,
216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const url_parse::Parsed& base_parsed,
217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const CHAR* in_relative,
218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       int in_relative_length,
219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_canon::CharsetConverter* charset_converter,
220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_canon::CanonOutput* output,
221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_parse::Parsed* output_parsed) {
222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Remove any whitespace from the middle of the relative URL, possibly
223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // copying to the new buffer.
224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  url_canon::RawCanonOutputT<CHAR> whitespace_buffer;
225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int relative_length;
226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                             &whitespace_buffer,
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                             &relative_length);
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // See if our base URL should be treated as "standard".
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool standard_base_scheme =
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      base_parsed.scheme.is_nonempty() &&
233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      DoIsStandard(base_spec, base_parsed.scheme);
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool is_relative;
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  url_parse::Component relative_component;
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (!url_canon::IsRelativeURL(base_spec, base_parsed,
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                relative, relative_length,
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                standard_base_scheme,
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                &is_relative,
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                &relative_component)) {
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Error resolving.
243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return false;
244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (is_relative) {
247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Relative, resolve and canonicalize.
248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bool file_base_scheme = base_parsed.scheme.is_nonempty() &&
249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        CompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);
250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return url_canon::ResolveRelativeURL(base_spec, base_parsed,
251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                         file_base_scheme, relative,
252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                         relative_component, charset_converter,
253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                         output, output_parsed);
254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Not relative, canonicalize the input.
257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoCanonicalize(relative, relative_length, charset_converter,
258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        output, output_parsed);
259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoReplaceComponents(const char* spec,
263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         int spec_len,
264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         const url_parse::Parsed& parsed,
265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         const url_canon::Replacements<CHAR>& replacements,
266c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         url_canon::CharsetConverter* charset_converter,
267c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         url_canon::CanonOutput* output,
268c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         url_parse::Parsed* out_parsed) {
269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // If the scheme is overridden, just do a simple string substitution and
270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // reparse the whole thing. There are lots of edge cases that we really don't
271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // want to deal with. Like what happens if I replace "http://e:8080/foo"
272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // with a file. Does it become "file:///E:/8080/foo" where the port number
273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // becomes part of the path? Parsing that string as a file URL says "yes"
274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // but almost no sane rule for dealing with the components individually would
275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // come up with that.
276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Why allow these crazy cases at all? Programatically, there is almost no
278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // case for replacing the scheme. The most common case for hitting this is
279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // in JS when building up a URL using the location object. In this case, the
280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // JS code expects the string substitution behavior:
281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //   http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3
282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (replacements.IsSchemeOverridden()) {
283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Canonicalize the new scheme so it is 8-bit and can be concatenated with
284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // the existing spec.
285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    url_canon::RawCanonOutput<128> scheme_replaced;
286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    url_parse::Component scheme_replaced_parsed;
287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    url_canon::CanonicalizeScheme(
288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        replacements.sources().scheme,
289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        replacements.components().scheme,
290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        &scheme_replaced, &scheme_replaced_parsed);
291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // We can assume that the input is canonicalized, which means it always has
293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // a colon after the scheme (or where the scheme would be).
294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1
295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                                    : 1;
296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (spec_len - spec_after_colon > 0) {
297c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      scheme_replaced.Append(&spec[spec_after_colon],
298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                             spec_len - spec_after_colon);
299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
301c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // We now need to completely re-parse the resulting string since its meaning
302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // may have changed with the different scheme.
303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    url_canon::RawCanonOutput<128> recanonicalized;
304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    url_parse::Parsed recanonicalized_parsed;
305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(),
306c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                   charset_converter,
307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                   &recanonicalized, &recanonicalized_parsed);
308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Recurse using the version with the scheme already replaced. This will now
310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // use the replacement rules for the new scheme.
311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    //
312c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Warning: this code assumes that ReplaceComponents will re-check all
313c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // components for validity. This is because we can't fail if DoCanonicalize
314c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // failed above since theoretically the thing making it fail could be
315c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // getting replaced here. If ReplaceComponents didn't re-check everything,
316c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // we wouldn't know if something *not* getting replaced is a problem.
317c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // If the scheme-specific replacers are made more intelligent so they don't
318c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // re-check everything, we should instead recanonicalize the whole thing
319c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // after this call to check validity (this assumes replacing the scheme is
320c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // much much less common than other types of replacements, like clearing the
321c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // ref).
322c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    url_canon::Replacements<CHAR> replacements_no_scheme = replacements;
323c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    replacements_no_scheme.SetScheme(NULL, url_parse::Component());
324c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
325c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                               recanonicalized_parsed, replacements_no_scheme,
326c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                               charset_converter, output, out_parsed);
327c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
328c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
329c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // If we get here, then we know the scheme doesn't need to be replaced, so can
330c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // just key off the scheme in the spec to know how to do the replacements.
331c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) {
332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return url_canon::ReplaceFileURL(spec, parsed, replacements,
333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                     charset_converter, output, out_parsed);
334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
335c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (DoIsStandard(spec, parsed.scheme)) {
336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return url_canon::ReplaceStandardURL(spec, parsed, replacements,
337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                         charset_converter, output, out_parsed);
338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
339c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) {
340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     return url_canon::ReplaceMailtoURL(spec, parsed, replacements,
341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                        output, out_parsed);
342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
344c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Default is a path URL.
345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return url_canon::ReplacePathURL(spec, parsed, replacements,
346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                   output, out_parsed);
347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace
350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
351c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Initialize() {
352c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  InitStandardSchemes();
353c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
354c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
355c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Shutdown() {
356c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (standard_schemes) {
357c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    delete standard_schemes;
358c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    standard_schemes = NULL;
359c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
360c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
361c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid AddStandardScheme(const char* new_scheme) {
363c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // If this assert triggers, it means you've called AddStandardScheme after
364c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // LockStandardSchemes have been called (see the header file for
365c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // LockStandardSchemes for more).
366c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
367c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // This normally means you're trying to set up a new standard scheme too late
368c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // in your application's init process. Locate where your app does this
369c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // initialization and calls LockStandardScheme, and add your new standard
370c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // scheme there.
371c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(!standard_schemes_locked) <<
372c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "Trying to add a standard scheme after the list has been locked.";
373c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t scheme_len = strlen(new_scheme);
375c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (scheme_len == 0)
376c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return;
377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
378c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Dulicate the scheme into a new buffer and add it to the list of standard
379c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // schemes. This pointer will be leaked on shutdown.
380c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  char* dup_scheme = new char[scheme_len + 1];
381c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  memcpy(dup_scheme, new_scheme, scheme_len + 1);
382c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
383c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  InitStandardSchemes();
384c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  standard_schemes->push_back(dup_scheme);
385c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
386c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
387c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid LockStandardSchemes() {
388c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  standard_schemes_locked = true;
389c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
390c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
391c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool IsStandard(const char* spec, const url_parse::Component& scheme) {
392c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return DoIsStandard(spec, scheme);
393c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
394c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
395c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool IsStandard(const char16* spec, const url_parse::Component& scheme) {
396c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return DoIsStandard(spec, scheme);
397c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
398c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
399c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool FindAndCompareScheme(const char* str,
400c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          int str_len,
401c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char* compare,
402c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          url_parse::Component* found_scheme) {
403c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
404c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
405c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
406c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool FindAndCompareScheme(const char16* str,
407c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          int str_len,
408c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char* compare,
409c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          url_parse::Component* found_scheme) {
410c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
411c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
412c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
413c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool Canonicalize(const char* spec,
414c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  int spec_len,
415c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  url_canon::CharsetConverter* charset_converter,
416c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  url_canon::CanonOutput* output,
417c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  url_parse::Parsed* output_parsed) {
418c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoCanonicalize(spec, spec_len, charset_converter,
419c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        output, output_parsed);
420c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
421c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
422c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool Canonicalize(const char16* spec,
423c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  int spec_len,
424c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  url_canon::CharsetConverter* charset_converter,
425c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  url_canon::CanonOutput* output,
426c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  url_parse::Parsed* output_parsed) {
427c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoCanonicalize(spec, spec_len, charset_converter,
428c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        output, output_parsed);
429c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
430c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
431c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ResolveRelative(const char* base_spec,
432c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     int base_spec_len,
433c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     const url_parse::Parsed& base_parsed,
434c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     const char* relative,
435c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     int relative_length,
436c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     url_canon::CharsetConverter* charset_converter,
437c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     url_canon::CanonOutput* output,
438c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     url_parse::Parsed* output_parsed) {
439c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoResolveRelative(base_spec, base_spec_len, base_parsed,
440c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           relative, relative_length,
441c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           charset_converter, output, output_parsed);
442c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
443c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
444c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ResolveRelative(const char* base_spec,
445c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     int base_spec_len,
446c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     const url_parse::Parsed& base_parsed,
447c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     const char16* relative,
448c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     int relative_length,
449c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     url_canon::CharsetConverter* charset_converter,
450c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     url_canon::CanonOutput* output,
451c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     url_parse::Parsed* output_parsed) {
452c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoResolveRelative(base_spec, base_spec_len, base_parsed,
453c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           relative, relative_length,
454c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           charset_converter, output, output_parsed);
455c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
456c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
457c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ReplaceComponents(const char* spec,
458c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       int spec_len,
459c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const url_parse::Parsed& parsed,
460c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const url_canon::Replacements<char>& replacements,
461c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_canon::CharsetConverter* charset_converter,
462c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_canon::CanonOutput* output,
463c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_parse::Parsed* out_parsed) {
464c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoReplaceComponents(spec, spec_len, parsed, replacements,
465c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                             charset_converter, output, out_parsed);
466c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
467c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
468c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ReplaceComponents(const char* spec,
469c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       int spec_len,
470c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const url_parse::Parsed& parsed,
471c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const url_canon::Replacements<char16>& replacements,
472c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_canon::CharsetConverter* charset_converter,
473c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_canon::CanonOutput* output,
474c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       url_parse::Parsed* out_parsed) {
475c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoReplaceComponents(spec, spec_len, parsed, replacements,
476c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                             charset_converter, output, out_parsed);
477c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
478c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
479c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Front-ends for LowerCaseEqualsASCII.
480c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool LowerCaseEqualsASCII(const char* a_begin,
481c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char* a_end,
482c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char* b) {
483c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
484c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
485c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
486c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool LowerCaseEqualsASCII(const char* a_begin,
487c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char* a_end,
488c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char* b_begin,
489c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char* b_end) {
490c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  while (a_begin != a_end && b_begin != b_end &&
491c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott         ToLowerASCII(*a_begin) == *b_begin) {
492c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    a_begin++;
493c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    b_begin++;
494c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
495c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return a_begin == a_end && b_begin == b_end;
496c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
497c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
498c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool LowerCaseEqualsASCII(const char16* a_begin,
499c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char16* a_end,
500c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const char* b) {
501c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
502c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
503c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
504c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid DecodeURLEscapeSequences(const char* input, int length,
505c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                              url_canon::CanonOutputW* output) {
506c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  url_canon::RawCanonOutputT<char> unescaped_chars;
507c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (int i = 0; i < length; i++) {
508c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (input[i] == '%') {
509c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      unsigned char ch;
510c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (url_canon::DecodeEscaped(input, &i, length, &ch)) {
511c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        unescaped_chars.push_back(ch);
512c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      } else {
513c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        // Invalid escape sequence, copy the percent literal.
514c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        unescaped_chars.push_back('%');
515c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      }
516c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    } else {
517c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // Regular non-escaped 8-bit character.
518c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      unescaped_chars.push_back(input[i]);
519c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
520c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
521c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
522c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Convert that 8-bit to UTF-16. It's not clear IE does this at all to
523c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // JavaScript URLs, but Firefox and Safari do.
524c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (int i = 0; i < unescaped_chars.length(); i++) {
525c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
526c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (uch < 0x80) {
527c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // Non-UTF-8, just append directly
528c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      output->push_back(uch);
529c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    } else {
530c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // next_ch will point to the last character of the decoded
531c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // character.
532c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      int next_character = i;
533c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      unsigned code_point;
534c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (url_canon::ReadUTFChar(unescaped_chars.data(), &next_character,
535c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                 unescaped_chars.length(), &code_point)) {
536c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        // Valid UTF-8 character, convert to UTF-16.
537c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        url_canon::AppendUTF16Value(code_point, output);
538c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        i = next_character;
539c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      } else {
540c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        // If there are any sequences that are not valid UTF-8, we keep
541c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        // invalid code points and promote to UTF-16. We copy all characters
542c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        // from the current position to the end of the identified sequence.
543c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        while (i < next_character) {
544c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
545c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          i++;
546c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        }
547c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
548c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      }
549c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
550c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
551c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
552c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
553c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace url_util
554