1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// found in the LICENSE file. 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Functions for canonicalizing "file:" URLs. 6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_canon.h" 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_canon_internal.h" 9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_file.h" 10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_parse_internal.h" 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 120529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochnamespace url { 13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace { 15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#ifdef WIN32 17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Given a pointer into the spec, this copies and canonicalizes the drive 19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// letter and colon to the output, if one is found. If there is not a drive 20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// spec, it won't do anything. The index of the next character in the input 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// spec is returned (after the colon when a drive spec is found, the begin 22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// offset if one is not). 23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int FileDoDriveSpec(const CHAR* spec, int begin, int end, 25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output) { 26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo, 27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // (with backslashes instead of slashes as well). 280529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch int num_slashes = CountConsecutiveSlashes(spec, begin, end); 29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int after_slashes = begin + num_slashes; 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 310529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end)) 32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return begin; // Haven't consumed any characters 33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // A drive spec is the start of a path, so we need to add a slash for the 35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // authority terminator (typically the third slash). 36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->push_back('/'); 37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid 39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // and that it is followed by a colon/pipe. 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Normalize Windows drive letters to uppercase 42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z') 43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->push_back(spec[after_slashes] - 'a' + 'A'); 44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->push_back(static_cast<char>(spec[after_slashes])); 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Normalize the character following it to a colon rather than pipe. 48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->push_back(':'); 49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return after_slashes + 2; 50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#endif // WIN32 53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR, typename UCHAR> 55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoFileCanonicalizePath(const CHAR* spec, 560529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& path, 57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 580529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Component* out_path) { 59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Copies and normalizes the "c:" at the beginning, if present. 60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) out_path->begin = output->length(); 61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int after_drive; 62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#ifdef WIN32 63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output); 64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#else 65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) after_drive = path.begin; 66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#endif 67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Copies the rest of the path, starting from the slash following the 69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // drive colon (if any, Windows only), or the first slash of the path. 70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool success = true; 71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (after_drive < path.end()) { 72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Use the regular path canonicalizer to canonicalize the rest of the 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // path. Give it a fake output component to write into. DoCanonicalizeFile 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // will compute the full path component. 750529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Component sub_path = MakeRange(after_drive, path.end()); 760529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Component fake_output_path; 77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) success = CanonicalizePath(spec, sub_path, output, &fake_output_path); 78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No input path, canonicalize to a slash. 80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->push_back('/'); 81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) out_path->len = output->length() - out_path->begin; 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return success; 85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR, typename UCHAR> 88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source, 890529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& parsed, 90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CharsetConverter* query_converter, 91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 920529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Things we don't set in file: URLs. 940529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_parsed->username = Component(); 950529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_parsed->password = Component(); 960529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_parsed->port = Component(); 97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Scheme (known, so we don't bother running it through the more 99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // complicated scheme canonicalizer). 100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_parsed->scheme.begin = output->length(); 101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->Append("file://", 7); 102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_parsed->scheme.len = 4; 103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Append the host. For many file URLs, this will be empty. For UNC, this 105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // will be present. 106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // TODO(brettw) This doesn't do any checking for host name validity. We 107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // should probably handle validity checking of UNC hosts differently than 108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // for regular IP hosts. 109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool success = CanonicalizeHost(source.host, parsed.host, 110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, &new_parsed->host); 111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path, 112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, &new_parsed->path); 113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonicalizeQuery(source.query, parsed.query, query_converter, 114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, &new_parsed->query); 115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 116c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Ignore failure for refs since the URL can probably still be loaded. 117c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); 118c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 119c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return success; 120c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 121c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 122c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} // namespace 123c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 124c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool CanonicalizeFileURL(const char* spec, 125c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int spec_len, 1260529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& parsed, 127c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CharsetConverter* query_converter, 128c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 1290529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 130c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoCanonicalizeFileURL<char, unsigned char>( 131c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) URLComponentSource<char>(spec), parsed, query_converter, 132c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, new_parsed); 133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 134c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 1357d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool CanonicalizeFileURL(const base::char16* spec, 136c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int spec_len, 1370529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& parsed, 138c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CharsetConverter* query_converter, 139c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 1400529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 1417d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) return DoCanonicalizeFileURL<base::char16, base::char16>( 1427d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) URLComponentSource<base::char16>(spec), parsed, query_converter, 143c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, new_parsed); 144c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 145c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 146c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool FileCanonicalizePath(const char* spec, 1470529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& path, 148c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 1490529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Component* out_path) { 150c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoFileCanonicalizePath<char, unsigned char>(spec, path, 151c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, out_path); 152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 153c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 1547d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool FileCanonicalizePath(const base::char16* spec, 1550529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Component& path, 156c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 1570529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Component* out_path) { 1587d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) return DoFileCanonicalizePath<base::char16, base::char16>(spec, path, 1597d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) output, out_path); 160c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 162c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool ReplaceFileURL(const char* base, 1630529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& base_parsed, 164c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Replacements<char>& replacements, 165c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CharsetConverter* query_converter, 166c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 1670529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 168c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) URLComponentSource<char> source(base); 1690529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed parsed(base_parsed); 170c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) SetupOverrideComponents(base, replacements, &source, &parsed); 171c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoCanonicalizeFileURL<char, unsigned char>( 172c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) source, parsed, query_converter, output, new_parsed); 173c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 174c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 175c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool ReplaceFileURL(const char* base, 1760529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& base_parsed, 1777d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const Replacements<base::char16>& replacements, 178c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CharsetConverter* query_converter, 179c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 1800529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 181c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) RawCanonOutput<1024> utf8; 182c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) URLComponentSource<char> source(base); 1830529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed parsed(base_parsed); 184c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 185c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoCanonicalizeFileURL<char, unsigned char>( 186c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) source, parsed, query_converter, output, new_parsed); 187c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 188c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 1890529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch} // namespace url 190