1// Copyright 2007, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// Functions for canonicalizing "file:" URLs.
31
32#include "googleurl/src/url_canon.h"
33#include "googleurl/src/url_canon_internal.h"
34#include "googleurl/src/url_file.h"
35#include "googleurl/src/url_parse_internal.h"
36
37namespace url_canon {
38
39namespace {
40
41#ifdef WIN32
42
43// Given a pointer into the spec, this copies and canonicalizes the drive
44// letter and colon to the output, if one is found. If there is not a drive
45// spec, it won't do anything. The index of the next character in the input
46// spec is returned (after the colon when a drive spec is found, the begin
47// offset if one is not).
48template<typename CHAR>
49int FileDoDriveSpec(const CHAR* spec, int begin, int end,
50                    CanonOutput* output) {
51  // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
52  // (with backslashes instead of slashes as well).
53  int num_slashes = url_parse::CountConsecutiveSlashes(spec, begin, end);
54  int after_slashes = begin + num_slashes;
55
56  if (!url_parse::DoesBeginWindowsDriveSpec(spec, after_slashes, end))
57    return begin;  // Haven't consumed any characters
58
59  // A drive spec is the start of a path, so we need to add a slash for the
60  // authority terminator (typically the third slash).
61  output->push_back('/');
62
63  // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
64  // and that it is followed by a colon/pipe.
65
66  // Normalize Windows drive letters to uppercase
67  if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
68    output->push_back(spec[after_slashes] - 'a' + 'A');
69  else
70    output->push_back(static_cast<char>(spec[after_slashes]));
71
72  // Normalize the character following it to a colon rather than pipe.
73  output->push_back(':');
74  return after_slashes + 2;
75}
76
77#endif  // WIN32
78
79template<typename CHAR, typename UCHAR>
80bool DoFileCanonicalizePath(const CHAR* spec,
81                            const url_parse::Component& path,
82                            CanonOutput* output,
83                            url_parse::Component* out_path) {
84  // Copies and normalizes the "c:" at the beginning, if present.
85  out_path->begin = output->length();
86  int after_drive;
87#ifdef WIN32
88  after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
89#else
90  after_drive = path.begin;
91#endif
92
93  // Copies the rest of the path, starting from the slash following the
94  // drive colon (if any, Windows only), or the first slash of the path.
95  bool success = true;
96  if (after_drive < path.end()) {
97    // Use the regular path canonicalizer to canonicalize the rest of the
98    // path. Give it a fake output component to write into. DoCanonicalizeFile
99    // will compute the full path component.
100    url_parse::Component sub_path =
101        url_parse::MakeRange(after_drive, path.end());
102    url_parse::Component fake_output_path;
103    success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
104  } else {
105    // No input path, canonicalize to a slash.
106    output->push_back('/');
107  }
108
109  out_path->len = output->length() - out_path->begin;
110  return success;
111}
112
113template<typename CHAR, typename UCHAR>
114bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
115                           const url_parse::Parsed& parsed,
116                           CharsetConverter* query_converter,
117                           CanonOutput* output,
118                           url_parse::Parsed* new_parsed) {
119  // Things we don't set in file: URLs.
120  new_parsed->username = url_parse::Component();
121  new_parsed->password = url_parse::Component();
122  new_parsed->port = url_parse::Component();
123
124  // Scheme (known, so we don't bother running it through the more
125  // complicated scheme canonicalizer).
126  new_parsed->scheme.begin = output->length();
127  output->Append("file://", 7);
128  new_parsed->scheme.len = 4;
129
130  // Append the host. For many file URLs, this will be empty. For UNC, this
131  // will be present.
132  // TODO(brettw) This doesn't do any checking for host name validity. We
133  // should probably handle validity checking of UNC hosts differently than
134  // for regular IP hosts.
135  bool success = CanonicalizeHost(source.host, parsed.host,
136                                  output, &new_parsed->host);
137  success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
138                                    output, &new_parsed->path);
139  CanonicalizeQuery(source.query, parsed.query, query_converter,
140                    output, &new_parsed->query);
141
142  // Ignore failure for refs since the URL can probably still be loaded.
143  CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
144
145  return success;
146}
147
148} // namespace
149
150bool CanonicalizeFileURL(const char* spec,
151                         int spec_len,
152                         const url_parse::Parsed& parsed,
153                         CharsetConverter* query_converter,
154                         CanonOutput* output,
155                         url_parse::Parsed* new_parsed) {
156  return DoCanonicalizeFileURL<char, unsigned char>(
157      URLComponentSource<char>(spec), parsed, query_converter,
158      output, new_parsed);
159}
160
161bool CanonicalizeFileURL(const char16* spec,
162                         int spec_len,
163                         const url_parse::Parsed& parsed,
164                         CharsetConverter* query_converter,
165                         CanonOutput* output,
166                         url_parse::Parsed* new_parsed) {
167  return DoCanonicalizeFileURL<char16, char16>(
168      URLComponentSource<char16>(spec), parsed, query_converter,
169      output, new_parsed);
170}
171
172bool FileCanonicalizePath(const char* spec,
173                          const url_parse::Component& path,
174                          CanonOutput* output,
175                          url_parse::Component* out_path) {
176  return DoFileCanonicalizePath<char, unsigned char>(spec, path,
177                                                     output, out_path);
178}
179
180bool FileCanonicalizePath(const char16* spec,
181                          const url_parse::Component& path,
182                          CanonOutput* output,
183                          url_parse::Component* out_path) {
184  return DoFileCanonicalizePath<char16, char16>(spec, path,
185                                                output, out_path);
186}
187
188bool ReplaceFileURL(const char* base,
189                    const url_parse::Parsed& base_parsed,
190                    const Replacements<char>& replacements,
191                    CharsetConverter* query_converter,
192                    CanonOutput* output,
193                    url_parse::Parsed* new_parsed) {
194  URLComponentSource<char> source(base);
195  url_parse::Parsed parsed(base_parsed);
196  SetupOverrideComponents(base, replacements, &source, &parsed);
197  return DoCanonicalizeFileURL<char, unsigned char>(
198      source, parsed, query_converter, output, new_parsed);
199}
200
201bool ReplaceFileURL(const char* base,
202                    const url_parse::Parsed& base_parsed,
203                    const Replacements<char16>& replacements,
204                    CharsetConverter* query_converter,
205                    CanonOutput* output,
206                    url_parse::Parsed* new_parsed) {
207  RawCanonOutput<1024> utf8;
208  URLComponentSource<char> source(base);
209  url_parse::Parsed parsed(base_parsed);
210  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
211  return DoCanonicalizeFileURL<char, unsigned char>(
212      source, parsed, query_converter, output, new_parsed);
213}
214
215}  // namespace url_canon
216