1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2007, Google Inc.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// All rights reserved.
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Redistribution and use in source and binary forms, with or without
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// modification, are permitted provided that the following conditions are
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// met:
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     * Redistributions of source code must retain the above copyright
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// notice, this list of conditions and the following disclaimer.
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     * Redistributions in binary form must reproduce the above
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// copyright notice, this list of conditions and the following disclaimer
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// in the documentation and/or other materials provided with the
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distribution.
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     * Neither the name of Google Inc. nor the names of its
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// contributors may be used to endorse or promote products derived from
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// this software without specific prior written permission.
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Functions for canonicalizing "path" URLs. Not to be confused with the path
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// of a URL, these are URLs that have no authority section, only a path. For
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// example, "javascript:" and "data:".
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_canon.h"
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_canon_internal.h"
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace url_canon {
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace {
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR, typename UCHAR>
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           const url_parse::Parsed& parsed,
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           CanonOutput* output,
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           url_parse::Parsed* new_parsed) {
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Scheme: this will append the colon.
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                    output, &new_parsed->scheme);
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // We assume there's no authority for path URLs. Note that hosts should never
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // have -1 length.
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  new_parsed->username.reset();
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  new_parsed->password.reset();
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  new_parsed->host.reset();
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  new_parsed->port.reset();
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (parsed.path.is_valid()) {
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Copy the path using path URL's more lax escaping rules (think for
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // ASCII characters alone. This helps readability of JavaStript.
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    new_parsed->path.begin = output->length();
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int end = parsed.path.end();
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for (int i = parsed.path.begin; i < end; i++) {
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      UCHAR uch = static_cast<UCHAR>(source.path[i]);
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      if (uch < 0x20 || uch >= 0x80)
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        success &= AppendUTF8EscapedChar(source.path, &i, end, output);
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      else
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        output->push_back(static_cast<char>(uch));
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    new_parsed->path.len = output->length() - new_parsed->path.begin;
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Empty path.
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    new_parsed->path.reset();
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Assume there's no query or ref.
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  new_parsed->query.reset();
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  new_parsed->ref.reset();
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return success;
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool CanonicalizePathURL(const char* spec,
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         int spec_len,
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         const url_parse::Parsed& parsed,
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         CanonOutput* output,
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         url_parse::Parsed* new_parsed) {
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoCanonicalizePathURL<char, unsigned char>(
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      URLComponentSource<char>(spec), parsed, output, new_parsed);
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool CanonicalizePathURL(const char16* spec,
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         int spec_len,
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         const url_parse::Parsed& parsed,
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         CanonOutput* output,
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         url_parse::Parsed* new_parsed) {
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoCanonicalizePathURL<char16, char16>(
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      URLComponentSource<char16>(spec), parsed, output, new_parsed);
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ReplacePathURL(const char* base,
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    const url_parse::Parsed& base_parsed,
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    const Replacements<char>& replacements,
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    CanonOutput* output,
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    url_parse::Parsed* new_parsed) {
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  URLComponentSource<char> source(base);
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  url_parse::Parsed parsed(base_parsed);
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  SetupOverrideComponents(base, replacements, &source, &parsed);
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoCanonicalizePathURL<char, unsigned char>(
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      source, parsed, output, new_parsed);
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ReplacePathURL(const char* base,
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    const url_parse::Parsed& base_parsed,
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    const Replacements<char16>& replacements,
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    CanonOutput* output,
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    url_parse::Parsed* new_parsed) {
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  RawCanonOutput<1024> utf8;
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  URLComponentSource<char> source(base);
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  url_parse::Parsed parsed(base_parsed);
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoCanonicalizePathURL<char, unsigned char>(
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      source, parsed, output, new_parsed);
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace url_canon
129