1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2007, Google Inc. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// All rights reserved. 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Redistribution and use in source and binary forms, with or without 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// modification, are permitted provided that the following conditions are 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// met: 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// * Redistributions of source code must retain the above copyright 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// notice, this list of conditions and the following disclaimer. 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// * Redistributions in binary form must reproduce the above 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// copyright notice, this list of conditions and the following disclaimer 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// in the documentation and/or other materials provided with the 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distribution. 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// * Neither the name of Google Inc. nor the names of its 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// contributors may be used to endorse or promote products derived from 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// this software without specific prior written permission. 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string.h> 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <vector> 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_util.h" 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/logging.h" 36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/url_canon_internal.h" 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_file.h" 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace url_util { 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace { 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// ASCII-specific tolower. The standard library's tolower is locale sensitive, 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// so we don't want to use it here. 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <class Char> inline Char ToLowerASCII(Char c) { 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Backend for LowerCaseEqualsASCII. 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename Iter> 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottinline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) { 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (Iter it = a_begin; it != a_end; ++it, ++b) { 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!*b || ToLowerASCII(*it) != *b) 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return *b == 0; 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst char kFileScheme[] = "file"; // Used in a number of places. 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst char kMailtoScheme[] = "mailto"; 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst int kNumStandardURLSchemes = 7; 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst char* kStandardURLSchemes[kNumStandardURLSchemes] = { 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott "http", 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott "https", 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott kFileScheme, // Yes, file urls can have a hostname! 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott "ftp", 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott "gopher", 69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "ws", // WebSocket. 70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "wss", // WebSocket secure. 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// List of the currently installed standard schemes. This list is lazily 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// initialized by InitStandardSchemes and is leaked on shutdown to prevent 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// any destructors from being called that will slow us down or cause problems. 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::vector<const char*>* standard_schemes = NULL; 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// See the LockStandardSchemes declaration in the header. 79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool standard_schemes_locked = false; 80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Ensures that the standard_schemes list is initialized, does nothing if it 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// already has values. 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid InitStandardSchemes() { 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (standard_schemes) 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott standard_schemes = new std::vector<const char*>; 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = 0; i < kNumStandardURLSchemes; i++) 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott standard_schemes->push_back(kStandardURLSchemes[i]); 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Given a string and a range inside the string, compares it to the given 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// lower-case |compare_to| buffer. 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottinline bool CompareSchemeComponent(const CHAR* spec, 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Component& component, 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* compare_to) { 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!component.is_nonempty()) 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return compare_to[0] == 0; // When component is empty, match empty scheme. 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return LowerCaseEqualsASCII(&spec[component.begin], 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott &spec[component.end()], 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott compare_to); 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Returns true if the given scheme identified by |scheme| within |spec| is one 105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// of the registered "standard" schemes. 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) { 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!scheme.is_nonempty()) 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; // Empty or invalid schemes are non-standard. 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott InitStandardSchemes(); 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (size_t i = 0; i < standard_schemes->size(); i++) { 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()], 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott standard_schemes->at(i))) 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return true; 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoFindAndCompareScheme(const CHAR* str, 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int str_len, 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* compare, 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Component* found_scheme) { 125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Before extracting scheme, canonicalize the URL to remove any whitespace. 126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // This matches the canonicalization done in DoCanonicalize function. 127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::RawCanonOutputT<CHAR> whitespace_buffer; 128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int spec_len; 129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const CHAR* spec = RemoveURLWhitespace(str, str_len, 130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &whitespace_buffer, &spec_len); 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Component our_scheme; 133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!url_parse::ExtractScheme(spec, spec_len, &our_scheme)) { 134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No scheme. 135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (found_scheme) 136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *found_scheme = url_parse::Component(); 137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (found_scheme) 140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *found_scheme = our_scheme; 141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return CompareSchemeComponent(spec, our_scheme, compare); 142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoCanonicalize(const CHAR* in_spec, int in_spec_len, 146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* output_parsed) { 149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Remove any whitespace from the middle of the relative URL, possibly 150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // copying to the new buffer. 151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::RawCanonOutputT<CHAR> whitespace_buffer; 152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len; 153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len, 154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott &whitespace_buffer, &spec_len); 155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed parsed_input; 157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifdef WIN32 158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // For Windows, we allow things that look like absolute Windows paths to be 159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // fixed up magically to file URLs. This is done for IE compatability. For 160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // example, this will change "c:/foo" into a file URL rather than treating 161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). 162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // There is similar logic in url_canon_relative.cc for 163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which 165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // has no meaning as an absolute path name. This is because browsers on Mac 166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // & Unix don't generally do this, so there is no compatibility reason for 167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // doing so. 168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) || 169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { 170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::ParseFileURL(spec, spec_len, &parsed_input); 171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, 172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, 173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif 176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Component scheme; 178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!url_parse::ExtractScheme(spec, spec_len, &scheme)) 179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // This is the parsed version of the input URL, we have to canonicalize it 182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // before storing it in our object. 183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool success; 184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (CompareSchemeComponent(spec, scheme, kFileScheme)) { 185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // File URLs are special. 186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::ParseFileURL(spec, spec_len, &parsed_input); 187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input, 188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, 189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else if (DoIsStandard(spec, scheme)) { 192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // All "normal" URLs. 193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::ParseStandardURL(spec, spec_len, &parsed_input); 194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input, 195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, 196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) { 199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Mailto are treated like a standard url with only a scheme, path, query 200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::ParseMailtoURL(spec, spec_len, &parsed_input); 201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input, 202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // "Weird" URLs like data: and javascript: 206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::ParsePathURL(spec, spec_len, &parsed_input); 207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott success = url_canon::CanonicalizePathURL(spec, spec_len, parsed_input, 208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return success; 211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoResolveRelative(const char* base_spec, 215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int base_spec_len, 216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& base_parsed, 217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const CHAR* in_relative, 218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int in_relative_length, 219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* output_parsed) { 222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Remove any whitespace from the middle of the relative URL, possibly 223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // copying to the new buffer. 224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::RawCanonOutputT<CHAR> whitespace_buffer; 225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int relative_length; 226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, 227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott &whitespace_buffer, 228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott &relative_length); 229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // See if our base URL should be treated as "standard". 231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool standard_base_scheme = 232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott base_parsed.scheme.is_nonempty() && 233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DoIsStandard(base_spec, base_parsed.scheme); 234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool is_relative; 236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Component relative_component; 237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!url_canon::IsRelativeURL(base_spec, base_parsed, 238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott relative, relative_length, 239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott standard_base_scheme, 240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott &is_relative, 241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott &relative_component)) { 242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Error resolving. 243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (is_relative) { 247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Relative, resolve and canonicalize. 248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool file_base_scheme = base_parsed.scheme.is_nonempty() && 249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); 250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return url_canon::ResolveRelativeURL(base_spec, base_parsed, 251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott file_base_scheme, relative, 252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott relative_component, charset_converter, 253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Not relative, canonicalize the input. 257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoCanonicalize(relative, relative_length, charset_converter, 258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoReplaceComponents(const char* spec, 263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& parsed, 265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_canon::Replacements<CHAR>& replacements, 266c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 267c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 268c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* out_parsed) { 269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // If the scheme is overridden, just do a simple string substitution and 270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // reparse the whole thing. There are lots of edge cases that we really don't 271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // want to deal with. Like what happens if I replace "http://e:8080/foo" 272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // with a file. Does it become "file:///E:/8080/foo" where the port number 273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // becomes part of the path? Parsing that string as a file URL says "yes" 274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // but almost no sane rule for dealing with the components individually would 275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // come up with that. 276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Why allow these crazy cases at all? Programatically, there is almost no 278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // case for replacing the scheme. The most common case for hitting this is 279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // in JS when building up a URL using the location object. In this case, the 280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // JS code expects the string substitution behavior: 281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3 282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (replacements.IsSchemeOverridden()) { 283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Canonicalize the new scheme so it is 8-bit and can be concatenated with 284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the existing spec. 285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::RawCanonOutput<128> scheme_replaced; 286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::Component scheme_replaced_parsed; 287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::CanonicalizeScheme( 288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch replacements.sources().scheme, 289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch replacements.components().scheme, 290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &scheme_replaced, &scheme_replaced_parsed); 291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We can assume that the input is canonicalized, which means it always has 293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // a colon after the scheme (or where the scheme would be). 294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1 295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch : 1; 296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (spec_len - spec_after_colon > 0) { 297c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch scheme_replaced.Append(&spec[spec_after_colon], 298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch spec_len - spec_after_colon); 299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 301c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We now need to completely re-parse the resulting string since its meaning 302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // may have changed with the different scheme. 303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::RawCanonOutput<128> recanonicalized; 304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_parse::Parsed recanonicalized_parsed; 305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), 306c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch charset_converter, 307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &recanonicalized, &recanonicalized_parsed); 308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Recurse using the version with the scheme already replaced. This will now 310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // use the replacement rules for the new scheme. 311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 312c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Warning: this code assumes that ReplaceComponents will re-check all 313c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // components for validity. This is because we can't fail if DoCanonicalize 314c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // failed above since theoretically the thing making it fail could be 315c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // getting replaced here. If ReplaceComponents didn't re-check everything, 316c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // we wouldn't know if something *not* getting replaced is a problem. 317c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // If the scheme-specific replacers are made more intelligent so they don't 318c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // re-check everything, we should instead recanonicalize the whole thing 319c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // after this call to check validity (this assumes replacing the scheme is 320c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // much much less common than other types of replacements, like clearing the 321c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // ref). 322c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::Replacements<CHAR> replacements_no_scheme = replacements; 323c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch replacements_no_scheme.SetScheme(NULL, url_parse::Component()); 324c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(), 325c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch recanonicalized_parsed, replacements_no_scheme, 326c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch charset_converter, output, out_parsed); 327c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 328c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 329c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // If we get here, then we know the scheme doesn't need to be replaced, so can 330c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // just key off the scheme in the spec to know how to do the replacements. 331c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) { 332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return url_canon::ReplaceFileURL(spec, parsed, replacements, 333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, output, out_parsed); 334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 335c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (DoIsStandard(spec, parsed.scheme)) { 336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return url_canon::ReplaceStandardURL(spec, parsed, replacements, 337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, output, out_parsed); 338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 339c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) { 340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return url_canon::ReplaceMailtoURL(spec, parsed, replacements, 341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, out_parsed); 342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 344c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Default is a path URL. 345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return url_canon::ReplacePathURL(spec, parsed, replacements, 346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, out_parsed); 347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace 350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 351c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Initialize() { 352c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch InitStandardSchemes(); 353c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 354c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 355c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Shutdown() { 356c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (standard_schemes) { 357c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch delete standard_schemes; 358c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch standard_schemes = NULL; 359c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 360c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 361c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid AddStandardScheme(const char* new_scheme) { 363c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // If this assert triggers, it means you've called AddStandardScheme after 364c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // LockStandardSchemes have been called (see the header file for 365c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // LockStandardSchemes for more). 366c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 367c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // This normally means you're trying to set up a new standard scheme too late 368c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // in your application's init process. Locate where your app does this 369c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // initialization and calls LockStandardScheme, and add your new standard 370c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // scheme there. 371c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(!standard_schemes_locked) << 372c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "Trying to add a standard scheme after the list has been locked."; 373c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t scheme_len = strlen(new_scheme); 375c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (scheme_len == 0) 376c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 378c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Dulicate the scheme into a new buffer and add it to the list of standard 379c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // schemes. This pointer will be leaked on shutdown. 380c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott char* dup_scheme = new char[scheme_len + 1]; 381c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott memcpy(dup_scheme, new_scheme, scheme_len + 1); 382c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 383c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott InitStandardSchemes(); 384c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott standard_schemes->push_back(dup_scheme); 385c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 386c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 387c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid LockStandardSchemes() { 388c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch standard_schemes_locked = true; 389c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 390c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 391c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool IsStandard(const char* spec, const url_parse::Component& scheme) { 392c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return DoIsStandard(spec, scheme); 393c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 394c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 395c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool IsStandard(const char16* spec, const url_parse::Component& scheme) { 396c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return DoIsStandard(spec, scheme); 397c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 398c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 399c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool FindAndCompareScheme(const char* str, 400c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int str_len, 401c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* compare, 402c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Component* found_scheme) { 403c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoFindAndCompareScheme(str, str_len, compare, found_scheme); 404c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 405c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 406c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool FindAndCompareScheme(const char16* str, 407c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int str_len, 408c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* compare, 409c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Component* found_scheme) { 410c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoFindAndCompareScheme(str, str_len, compare, found_scheme); 411c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 412c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 413c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool Canonicalize(const char* spec, 414c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 415c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 416c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 417c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* output_parsed) { 418c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoCanonicalize(spec, spec_len, charset_converter, 419c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 420c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 421c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 422c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool Canonicalize(const char16* spec, 423c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 424c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 425c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 426c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* output_parsed) { 427c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoCanonicalize(spec, spec_len, charset_converter, 428c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, output_parsed); 429c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 430c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 431c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ResolveRelative(const char* base_spec, 432c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int base_spec_len, 433c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& base_parsed, 434c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* relative, 435c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int relative_length, 436c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 437c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 438c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* output_parsed) { 439c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoResolveRelative(base_spec, base_spec_len, base_parsed, 440c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott relative, relative_length, 441c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, output, output_parsed); 442c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 443c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 444c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ResolveRelative(const char* base_spec, 445c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int base_spec_len, 446c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& base_parsed, 447c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char16* relative, 448c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int relative_length, 449c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 450c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 451c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* output_parsed) { 452c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoResolveRelative(base_spec, base_spec_len, base_parsed, 453c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott relative, relative_length, 454c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, output, output_parsed); 455c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 456c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 457c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ReplaceComponents(const char* spec, 458c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 459c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& parsed, 460c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_canon::Replacements<char>& replacements, 461c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 462c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 463c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* out_parsed) { 464c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoReplaceComponents(spec, spec_len, parsed, replacements, 465c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, output, out_parsed); 466c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 467c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 468c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ReplaceComponents(const char* spec, 469c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 470c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& parsed, 471c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_canon::Replacements<char16>& replacements, 472c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CharsetConverter* charset_converter, 473c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_canon::CanonOutput* output, 474c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* out_parsed) { 475c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoReplaceComponents(spec, spec_len, parsed, replacements, 476c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset_converter, output, out_parsed); 477c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 478c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 479c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Front-ends for LowerCaseEqualsASCII. 480c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool LowerCaseEqualsASCII(const char* a_begin, 481c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* a_end, 482c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* b) { 483c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoLowerCaseEqualsASCII(a_begin, a_end, b); 484c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 485c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 486c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool LowerCaseEqualsASCII(const char* a_begin, 487c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* a_end, 488c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* b_begin, 489c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* b_end) { 490c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott while (a_begin != a_end && b_begin != b_end && 491c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ToLowerASCII(*a_begin) == *b_begin) { 492c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott a_begin++; 493c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott b_begin++; 494c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 495c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return a_begin == a_end && b_begin == b_end; 496c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 497c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 498c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool LowerCaseEqualsASCII(const char16* a_begin, 499c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char16* a_end, 500c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* b) { 501c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoLowerCaseEqualsASCII(a_begin, a_end, b); 502c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 503c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 504c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid DecodeURLEscapeSequences(const char* input, int length, 505c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::CanonOutputW* output) { 506c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::RawCanonOutputT<char> unescaped_chars; 507c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (int i = 0; i < length; i++) { 508c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (input[i] == '%') { 509c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unsigned char ch; 510c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (url_canon::DecodeEscaped(input, &i, length, &ch)) { 511c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unescaped_chars.push_back(ch); 512c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 513c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Invalid escape sequence, copy the percent literal. 514c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unescaped_chars.push_back('%'); 515c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 516c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 517c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Regular non-escaped 8-bit character. 518c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unescaped_chars.push_back(input[i]); 519c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 520c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 521c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 522c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Convert that 8-bit to UTF-16. It's not clear IE does this at all to 523c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // JavaScript URLs, but Firefox and Safari do. 524c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (int i = 0; i < unescaped_chars.length(); i++) { 525c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i)); 526c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (uch < 0x80) { 527c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Non-UTF-8, just append directly 528c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch output->push_back(uch); 529c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 530c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // next_ch will point to the last character of the decoded 531c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // character. 532c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int next_character = i; 533c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unsigned code_point; 534c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (url_canon::ReadUTFChar(unescaped_chars.data(), &next_character, 535c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unescaped_chars.length(), &code_point)) { 536c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Valid UTF-8 character, convert to UTF-16. 537c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_canon::AppendUTF16Value(code_point, output); 538c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i = next_character; 539c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 540c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // If there are any sequences that are not valid UTF-8, we keep 541c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // invalid code points and promote to UTF-16. We copy all characters 542c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // from the current position to the end of the identified sequence. 543c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (i < next_character) { 544c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch output->push_back(static_cast<unsigned char>(unescaped_chars.at(i))); 545c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i++; 546c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 547c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch output->push_back(static_cast<unsigned char>(unescaped_chars.at(i))); 548c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 549c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 550c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 551c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 552c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 553c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace url_util 554