1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/logging.h"
6#include "url/url_file.h"
7#include "url/url_parse.h"
8#include "url/url_parse_internal.h"
9
10// Interesting IE file:isms...
11//
12//  INPUT                      OUTPUT
13//  =========================  ==============================
14//  file:/foo/bar              file:///foo/bar
15//      The result here seems totally invalid!?!? This isn't UNC.
16//
17//  file:/
18//  file:// or any other number of slashes
19//      IE6 doesn't do anything at all if you click on this link. No error:
20//      nothing. IE6's history system seems to always color this link, so I'm
21//      guessing that it maps internally to the empty URL.
22//
23//  C:\                        file:///C:/
24//      When on a file: URL source page, this link will work. When over HTTP,
25//      the file: URL will appear in the status bar but the link will not work
26//      (security restriction for all file URLs).
27//
28//  file:foo/                  file:foo/     (invalid?!?!?)
29//  file:/foo/                 file:///foo/  (invalid?!?!?)
30//  file://foo/                file://foo/   (UNC to server "foo")
31//  file:///foo/               file:///foo/  (invalid, seems to be a file)
32//  file:////foo/              file://foo/   (UNC to server "foo")
33//      Any more than four slashes is also treated as UNC.
34//
35//  file:C:/                   file://C:/
36//  file:/C:/                  file://C:/
37//      The number of slashes after "file:" don't matter if the thing following
38//      it looks like an absolute drive path. Also, slashes and backslashes are
39//      equally valid here.
40
41namespace url_parse {
42
43namespace {
44
45// A subcomponent of DoInitFileURL, the input of this function should be a UNC
46// path name, with the index of the first character after the slashes following
47// the scheme given in |after_slashes|. This will initialize the host, path,
48// query, and ref, and leave the other output components untouched
49// (DoInitFileURL handles these for us).
50template<typename CHAR>
51void DoParseUNC(const CHAR* spec,
52                int after_slashes,
53                int spec_len,
54               Parsed* parsed) {
55  int next_slash = FindNextSlash(spec, after_slashes, spec_len);
56  if (next_slash == spec_len) {
57    // No additional slash found, as in "file://foo", treat the text as the
58    // host with no path (this will end up being UNC to server "foo").
59    int host_len = spec_len - after_slashes;
60    if (host_len)
61      parsed->host = Component(after_slashes, host_len);
62    else
63      parsed->host.reset();
64    parsed->path.reset();
65    return;
66  }
67
68#ifdef WIN32
69  // See if we have something that looks like a path following the first
70  // component. As in "file://localhost/c:/", we get "c:/" out. We want to
71  // treat this as a having no host but the path given. Works on Windows only.
72  if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) {
73    parsed->host.reset();
74    ParsePathInternal(spec, MakeRange(next_slash, spec_len),
75                      &parsed->path, &parsed->query, &parsed->ref);
76    return;
77  }
78#endif
79
80  // Otherwise, everything up until that first slash we found is the host name,
81  // which will end up being the UNC host. For example "file://foo/bar.txt"
82  // will get a server name of "foo" and a path of "/bar". Later, on Windows,
83  // this should be treated as the filename "\\foo\bar.txt" in proper UNC
84  // notation.
85  int host_len = next_slash - after_slashes;
86  if (host_len)
87    parsed->host = MakeRange(after_slashes, next_slash);
88  else
89    parsed->host.reset();
90  if (next_slash < spec_len) {
91    ParsePathInternal(spec, MakeRange(next_slash, spec_len),
92                      &parsed->path, &parsed->query, &parsed->ref);
93  } else {
94    parsed->path.reset();
95  }
96}
97
98// A subcomponent of DoParseFileURL, the input should be a local file, with the
99// beginning of the path indicated by the index in |path_begin|. This will
100// initialize the host, path, query, and ref, and leave the other output
101// components untouched (DoInitFileURL handles these for us).
102template<typename CHAR>
103void DoParseLocalFile(const CHAR* spec,
104                      int path_begin,
105                      int spec_len,
106                      Parsed* parsed) {
107  parsed->host.reset();
108  ParsePathInternal(spec, MakeRange(path_begin, spec_len),
109                    &parsed->path, &parsed->query, &parsed->ref);
110}
111
112// Backend for the external functions that operates on either char type.
113// We are handed the character after the "file:" at the beginning of the spec.
114// Usually this is a slash, but needn't be; we allow paths like "file:c:\foo".
115template<typename CHAR>
116void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
117  DCHECK(spec_len >= 0);
118
119  // Get the parts we never use for file URLs out of the way.
120  parsed->username.reset();
121  parsed->password.reset();
122  parsed->port.reset();
123
124  // Many of the code paths don't set these, so it's convenient to just clear
125  // them. We'll write them in those cases we need them.
126  parsed->query.reset();
127  parsed->ref.reset();
128
129  // Strip leading & trailing spaces and control characters.
130  int begin = 0;
131  TrimURL(spec, &begin, &spec_len);
132
133  // Find the scheme.
134  int num_slashes;
135  int after_scheme;
136  int after_slashes;
137#ifdef WIN32
138  // See how many slashes there are. We want to handle cases like UNC but also
139  // "/c:/foo". This is when there is no scheme, so we can allow pages to do
140  // links like "c:/foo/bar" or "//foo/bar". This is also called by the
141  // relative URL resolver when it determines there is an absolute URL, which
142  // may give us input like "/c:/foo".
143  num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
144  after_slashes = begin + num_slashes;
145  if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
146    // Windows path, don't try to extract the scheme (for example, "c:\foo").
147    parsed->scheme.reset();
148    after_scheme = after_slashes;
149  } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) {
150    // Windows UNC path: don't try to extract the scheme, but keep the slashes.
151    parsed->scheme.reset();
152    after_scheme = begin;
153  } else
154#endif
155  {
156    if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
157      // Offset the results since we gave ExtractScheme a substring.
158      parsed->scheme.begin += begin;
159      after_scheme = parsed->scheme.end() + 1;
160    } else {
161      // No scheme found, remember that.
162      parsed->scheme.reset();
163      after_scheme = begin;
164    }
165  }
166
167  // Handle empty specs ones that contain only whitespace or control chars,
168  // or that are just the scheme (for example "file:").
169  if (after_scheme == spec_len) {
170    parsed->host.reset();
171    parsed->path.reset();
172    return;
173  }
174
175  num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
176
177  after_slashes = after_scheme + num_slashes;
178#ifdef WIN32
179  // Check whether the input is a drive again. We checked above for windows
180  // drive specs, but that's only at the very beginning to see if we have a
181  // scheme at all. This test will be duplicated in that case, but will
182  // additionally handle all cases with a real scheme such as "file:///C:/".
183  if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) &&
184      num_slashes != 3) {
185    // Anything not beginning with a drive spec ("c:\") on Windows is treated
186    // as UNC, with the exception of three slashes which always means a file.
187    // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
188    DoParseUNC(spec, after_slashes, spec_len, parsed);
189    return;
190  }
191#else
192  // file: URL with exactly 2 slashes is considered to have a host component.
193  if (num_slashes == 2) {
194    DoParseUNC(spec, after_slashes, spec_len, parsed);
195    return;
196  }
197#endif  // WIN32
198
199  // Easy and common case, the full path immediately follows the scheme
200  // (modulo slashes), as in "file://c:/foo". Just treat everything from
201  // there to the end as the path. Empty hosts have 0 length instead of -1.
202  // We include the last slash as part of the path if there is one.
203  DoParseLocalFile(spec,
204      num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme,
205      spec_len, parsed);
206}
207
208}  // namespace
209
210void ParseFileURL(const char* url, int url_len, Parsed* parsed) {
211  DoParseFileURL(url, url_len, parsed);
212}
213
214void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) {
215  DoParseFileURL(url, url_len, parsed);
216}
217
218}  // namespace url_parse
219