1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/logging.h" 6#include "url/url_file.h" 7#include "url/url_parse.h" 8#include "url/url_parse_internal.h" 9 10// Interesting IE file:isms... 11// 12// INPUT OUTPUT 13// ========================= ============================== 14// file:/foo/bar file:///foo/bar 15// The result here seems totally invalid!?!? This isn't UNC. 16// 17// file:/ 18// file:// or any other number of slashes 19// IE6 doesn't do anything at all if you click on this link. No error: 20// nothing. IE6's history system seems to always color this link, so I'm 21// guessing that it maps internally to the empty URL. 22// 23// C:\ file:///C:/ 24// When on a file: URL source page, this link will work. When over HTTP, 25// the file: URL will appear in the status bar but the link will not work 26// (security restriction for all file URLs). 27// 28// file:foo/ file:foo/ (invalid?!?!?) 29// file:/foo/ file:///foo/ (invalid?!?!?) 30// file://foo/ file://foo/ (UNC to server "foo") 31// file:///foo/ file:///foo/ (invalid, seems to be a file) 32// file:////foo/ file://foo/ (UNC to server "foo") 33// Any more than four slashes is also treated as UNC. 34// 35// file:C:/ file://C:/ 36// file:/C:/ file://C:/ 37// The number of slashes after "file:" don't matter if the thing following 38// it looks like an absolute drive path. Also, slashes and backslashes are 39// equally valid here. 40 41namespace url_parse { 42 43namespace { 44 45// A subcomponent of DoInitFileURL, the input of this function should be a UNC 46// path name, with the index of the first character after the slashes following 47// the scheme given in |after_slashes|. This will initialize the host, path, 48// query, and ref, and leave the other output components untouched 49// (DoInitFileURL handles these for us). 50template<typename CHAR> 51void DoParseUNC(const CHAR* spec, 52 int after_slashes, 53 int spec_len, 54 Parsed* parsed) { 55 int next_slash = FindNextSlash(spec, after_slashes, spec_len); 56 if (next_slash == spec_len) { 57 // No additional slash found, as in "file://foo", treat the text as the 58 // host with no path (this will end up being UNC to server "foo"). 59 int host_len = spec_len - after_slashes; 60 if (host_len) 61 parsed->host = Component(after_slashes, host_len); 62 else 63 parsed->host.reset(); 64 parsed->path.reset(); 65 return; 66 } 67 68#ifdef WIN32 69 // See if we have something that looks like a path following the first 70 // component. As in "file://localhost/c:/", we get "c:/" out. We want to 71 // treat this as a having no host but the path given. Works on Windows only. 72 if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) { 73 parsed->host.reset(); 74 ParsePathInternal(spec, MakeRange(next_slash, spec_len), 75 &parsed->path, &parsed->query, &parsed->ref); 76 return; 77 } 78#endif 79 80 // Otherwise, everything up until that first slash we found is the host name, 81 // which will end up being the UNC host. For example "file://foo/bar.txt" 82 // will get a server name of "foo" and a path of "/bar". Later, on Windows, 83 // this should be treated as the filename "\\foo\bar.txt" in proper UNC 84 // notation. 85 int host_len = next_slash - after_slashes; 86 if (host_len) 87 parsed->host = MakeRange(after_slashes, next_slash); 88 else 89 parsed->host.reset(); 90 if (next_slash < spec_len) { 91 ParsePathInternal(spec, MakeRange(next_slash, spec_len), 92 &parsed->path, &parsed->query, &parsed->ref); 93 } else { 94 parsed->path.reset(); 95 } 96} 97 98// A subcomponent of DoParseFileURL, the input should be a local file, with the 99// beginning of the path indicated by the index in |path_begin|. This will 100// initialize the host, path, query, and ref, and leave the other output 101// components untouched (DoInitFileURL handles these for us). 102template<typename CHAR> 103void DoParseLocalFile(const CHAR* spec, 104 int path_begin, 105 int spec_len, 106 Parsed* parsed) { 107 parsed->host.reset(); 108 ParsePathInternal(spec, MakeRange(path_begin, spec_len), 109 &parsed->path, &parsed->query, &parsed->ref); 110} 111 112// Backend for the external functions that operates on either char type. 113// We are handed the character after the "file:" at the beginning of the spec. 114// Usually this is a slash, but needn't be; we allow paths like "file:c:\foo". 115template<typename CHAR> 116void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) { 117 DCHECK(spec_len >= 0); 118 119 // Get the parts we never use for file URLs out of the way. 120 parsed->username.reset(); 121 parsed->password.reset(); 122 parsed->port.reset(); 123 124 // Many of the code paths don't set these, so it's convenient to just clear 125 // them. We'll write them in those cases we need them. 126 parsed->query.reset(); 127 parsed->ref.reset(); 128 129 // Strip leading & trailing spaces and control characters. 130 int begin = 0; 131 TrimURL(spec, &begin, &spec_len); 132 133 // Find the scheme. 134 int num_slashes; 135 int after_scheme; 136 int after_slashes; 137#ifdef WIN32 138 // See how many slashes there are. We want to handle cases like UNC but also 139 // "/c:/foo". This is when there is no scheme, so we can allow pages to do 140 // links like "c:/foo/bar" or "//foo/bar". This is also called by the 141 // relative URL resolver when it determines there is an absolute URL, which 142 // may give us input like "/c:/foo". 143 num_slashes = CountConsecutiveSlashes(spec, begin, spec_len); 144 after_slashes = begin + num_slashes; 145 if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) { 146 // Windows path, don't try to extract the scheme (for example, "c:\foo"). 147 parsed->scheme.reset(); 148 after_scheme = after_slashes; 149 } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) { 150 // Windows UNC path: don't try to extract the scheme, but keep the slashes. 151 parsed->scheme.reset(); 152 after_scheme = begin; 153 } else 154#endif 155 { 156 if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { 157 // Offset the results since we gave ExtractScheme a substring. 158 parsed->scheme.begin += begin; 159 after_scheme = parsed->scheme.end() + 1; 160 } else { 161 // No scheme found, remember that. 162 parsed->scheme.reset(); 163 after_scheme = begin; 164 } 165 } 166 167 // Handle empty specs ones that contain only whitespace or control chars, 168 // or that are just the scheme (for example "file:"). 169 if (after_scheme == spec_len) { 170 parsed->host.reset(); 171 parsed->path.reset(); 172 return; 173 } 174 175 num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); 176 177 after_slashes = after_scheme + num_slashes; 178#ifdef WIN32 179 // Check whether the input is a drive again. We checked above for windows 180 // drive specs, but that's only at the very beginning to see if we have a 181 // scheme at all. This test will be duplicated in that case, but will 182 // additionally handle all cases with a real scheme such as "file:///C:/". 183 if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) && 184 num_slashes != 3) { 185 // Anything not beginning with a drive spec ("c:\") on Windows is treated 186 // as UNC, with the exception of three slashes which always means a file. 187 // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails. 188 DoParseUNC(spec, after_slashes, spec_len, parsed); 189 return; 190 } 191#else 192 // file: URL with exactly 2 slashes is considered to have a host component. 193 if (num_slashes == 2) { 194 DoParseUNC(spec, after_slashes, spec_len, parsed); 195 return; 196 } 197#endif // WIN32 198 199 // Easy and common case, the full path immediately follows the scheme 200 // (modulo slashes), as in "file://c:/foo". Just treat everything from 201 // there to the end as the path. Empty hosts have 0 length instead of -1. 202 // We include the last slash as part of the path if there is one. 203 DoParseLocalFile(spec, 204 num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme, 205 spec_len, parsed); 206} 207 208} // namespace 209 210void ParseFileURL(const char* url, int url_len, Parsed* parsed) { 211 DoParseFileURL(url, url_len, parsed); 212} 213 214void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) { 215 DoParseFileURL(url, url_len, parsed); 216} 217 218} // namespace url_parse 219