1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2007, Google Inc. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// All rights reserved. 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Redistribution and use in source and binary forms, with or without 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// modification, are permitted provided that the following conditions are 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// met: 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// * Redistributions of source code must retain the above copyright 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// notice, this list of conditions and the following disclaimer. 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// * Redistributions in binary form must reproduce the above 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// copyright notice, this list of conditions and the following disclaimer 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// in the documentation and/or other materials provided with the 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distribution. 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// * Neither the name of Google Inc. nor the names of its 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// contributors may be used to endorse or promote products derived from 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// this software without specific prior written permission. 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Functions for canonicalizing "path" URLs. Not to be confused with the path 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// of a URL, these are URLs that have no authority section, only a path. For 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// example, "javascript:" and "data:". 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_canon.h" 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_canon_internal.h" 36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace url_canon { 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace { 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR, typename UCHAR> 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& parsed, 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CanonOutput* output, 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* new_parsed) { 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Scheme: this will append the colon. 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool success = CanonicalizeScheme(source.scheme, parsed.scheme, 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output, &new_parsed->scheme); 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We assume there's no authority for path URLs. Note that hosts should never 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // have -1 length. 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->username.reset(); 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->password.reset(); 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->host.reset(); 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->port.reset(); 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (parsed.path.is_valid()) { 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Copy the path using path URL's more lax escaping rules (think for 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // ASCII characters alone. This helps readability of JavaStript. 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->path.begin = output->length(); 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int end = parsed.path.end(); 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = parsed.path.begin; i < end; i++) { 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott UCHAR uch = static_cast<UCHAR>(source.path[i]); 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (uch < 0x20 || uch >= 0x80) 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott success &= AppendUTF8EscapedChar(source.path, &i, end, output); 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott else 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output->push_back(static_cast<char>(uch)); 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->path.len = output->length() - new_parsed->path.begin; 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Empty path. 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->path.reset(); 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Assume there's no query or ref. 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->query.reset(); 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_parsed->ref.reset(); 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return success; 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool CanonicalizePathURL(const char* spec, 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& parsed, 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CanonOutput* output, 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* new_parsed) { 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoCanonicalizePathURL<char, unsigned char>( 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott URLComponentSource<char>(spec), parsed, output, new_parsed); 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool CanonicalizePathURL(const char16* spec, 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& parsed, 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CanonOutput* output, 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* new_parsed) { 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoCanonicalizePathURL<char16, char16>( 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott URLComponentSource<char16>(spec), parsed, output, new_parsed); 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ReplacePathURL(const char* base, 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& base_parsed, 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Replacements<char>& replacements, 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CanonOutput* output, 107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* new_parsed) { 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott URLComponentSource<char> source(base); 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed parsed(base_parsed); 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott SetupOverrideComponents(base, replacements, &source, &parsed); 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoCanonicalizePathURL<char, unsigned char>( 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott source, parsed, output, new_parsed); 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ReplacePathURL(const char* base, 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const url_parse::Parsed& base_parsed, 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Replacements<char16>& replacements, 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CanonOutput* output, 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed* new_parsed) { 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RawCanonOutput<1024> utf8; 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott URLComponentSource<char> source(base); 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott url_parse::Parsed parsed(base_parsed); 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoCanonicalizePathURL<char, unsigned char>( 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott source, parsed, output, new_parsed); 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace url_canon 129