1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott/* Based on nsURLParsers.cc from Mozilla 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * ------------------------------------- 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The contents of this file are subject to the Mozilla Public License Version 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 1.1 (the "License"); you may not use this file except in compliance with 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the License. You may obtain a copy of the License at 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * http://www.mozilla.org/MPL/ 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Software distributed under the License is distributed on an "AS IS" basis, 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * for the specific language governing rights and limitations under the 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * License. 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The Original Code is mozilla.org code. 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The Initial Developer of the Original Code is 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Netscape Communications Corporation. 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Portions created by the Initial Developer are Copyright (C) 1998 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the Initial Developer. All Rights Reserved. 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Contributor(s): 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Darin Fisher (original author) 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Alternatively, the contents of this file may be used under the terms of 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * either the GNU General Public License Version 2 or later (the "GPL"), or 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * in which case the provisions of the GPL or the LGPL are applicable instead 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * of those above. If you wish to allow use of your version of this file only 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * under the terms of either the GPL or the LGPL, and not to allow others to 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * use your version of this file under the terms of the MPL, indicate your 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * decision by deleting the provisions above and replace them with the notice 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * and other provisions required by the GPL or the LGPL. If you do not delete 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the provisions above, a recipient may use your version of this file under 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the terms of any one of the MPL, the GPL or the LGPL. 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * ***** END LICENSE BLOCK ***** */ 36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_parse.h" 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stdlib.h> 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/logging.h" 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_parse_internal.h" 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace url_parse { 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace { 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Returns true if the given character is a valid digit to use in a port. 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottinline bool IsPortDigit(char16 ch) { 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return ch >= '0' && ch <= '9'; 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Returns the offset of the next authority terminator in the input starting 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// from start_offset. If no terminator is found, the return value will be equal 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to spec_len. 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint FindNextAuthorityTerminator(const CHAR* spec, 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int start_offset, 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len) { 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = start_offset; i < spec_len; i++) { 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (IsAuthorityTerminator(spec[i])) 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return i; 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return spec_len; // Not found. 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseUserInfo(const CHAR* spec, 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& user, 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* username, 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* password) { 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Find the first colon in the user section, which separates the username and 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // password. 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int colon_offset = 0; 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott while (colon_offset < user.len && spec[user.begin + colon_offset] != ':') 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott colon_offset++; 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (colon_offset < user.len) { 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Found separator: <username>:<password> 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *username = Component(user.begin, colon_offset); 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *password = MakeRange(user.begin + colon_offset + 1, 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott user.begin + user.len); 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No separator, treat everything as the username 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *username = user; 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *password = Component(); 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseServerInfo(const CHAR* spec, 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& serverinfo, 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* hostname, 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* port_num) { 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (serverinfo.len == 0) { 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No server info, host name is empty. 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott hostname->reset(); 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott port_num->reset(); 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If the host starts with a left-bracket, assume the entire host is an 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal. 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // This assumption will be overridden if we find a right-bracket. 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Our IPv6 address canonicalization code requires both brackets to exist, 107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // but the ability to locate an incomplete address can still be useful. 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1; 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int colon = -1; 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Find the last right-bracket, and the last colon. 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = serverinfo.begin; i < serverinfo.end(); i++) { 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott switch (spec[i]) { 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case ']': 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ipv6_terminator = i; 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case ':': 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott colon = i; 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (colon > ipv6_terminator) { 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Found a port number: <hostname>:<port> 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *hostname = MakeRange(serverinfo.begin, colon); 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (hostname->len == 0) 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott hostname->reset(); 128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *port_num = MakeRange(colon + 1, serverinfo.end()); 129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No port: <hostname> 131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *hostname = serverinfo; 132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott port_num->reset(); 133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Given an already-identified auth section, breaks it into its consituent 137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// parts. The port number will be parsed and the resulting integer will be 138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// filled into the given *port variable, or -1 if there is no port number or it 139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// is invalid. 140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoParseAuthority(const CHAR* spec, 142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& auth, 143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* username, 144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* password, 145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* hostname, 146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* port_num) { 147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DCHECK(auth.is_valid()) << "We should always get an authority"; 148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (auth.len == 0) { 149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott username->reset(); 150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott password->reset(); 151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott hostname->reset(); 152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott port_num->reset(); 153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Search backwards for @, which is the separator between the user info and 157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // the server info. 158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int i = auth.begin + auth.len - 1; 159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott while (i > auth.begin && spec[i] != '@') 160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott i--; 161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (spec[i] == '@') { 163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Found user info: <user-info>@<server-info> 164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ParseUserInfo(spec, Component(auth.begin, i - auth.begin), 165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott username, password); 166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), 167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott hostname, port_num); 168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No user info, everything is server info. 170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott username->reset(); 171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott password->reset(); 172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ParseServerInfo(spec, auth, hostname, port_num); 173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePath(const CHAR* spec, 178c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& path, 179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* filepath, 180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* query, 181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* ref) { 182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref> 183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Special case when there is no path. 185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (path.len == -1) { 186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott filepath->reset(); 187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott query->reset(); 188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ref->reset(); 189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DCHECK(path.len > 0) << "We should never have 0 length paths"; 192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Search for first occurrence of either ? or #. 194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int path_end = path.begin + path.len; 195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int query_separator = -1; // Index of the '?' 197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int ref_separator = -1; // Index of the '#' 198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = path.begin; i < path_end; i++) { 199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott switch (spec[i]) { 200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case '?': 201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Only match the query string if it precedes the reference fragment 202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // and when we haven't found one already. 203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (ref_separator < 0 && query_separator < 0) 204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott query_separator = i; 205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case '#': 207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Record the first # sign only. 208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (ref_separator < 0) 209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ref_separator = i; 210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Markers pointing to the character after each of these corresponding 215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // components. The code below words from the end back to the beginning, 216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // and will update these indices as it finds components that exist. 217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int file_end, query_end; 218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Ref fragment: from the # to the end of the path. 220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (ref_separator >= 0) { 221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott file_end = query_end = ref_separator; 222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *ref = MakeRange(ref_separator + 1, path_end); 223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott file_end = query_end = path_end; 225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ref->reset(); 226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Query fragment: everything from the ? to the next boundary (either the end 229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // of the path or the ref fragment). 230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (query_separator >= 0) { 231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott file_end = query_separator; 232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *query = MakeRange(query_separator + 1, query_end); 233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott query->reset(); 235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // File path: treat an empty file path as no file path. 238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (file_end != path.begin) 239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *filepath = MakeRange(path.begin, file_end); 240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott else 241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott filepath->reset(); 242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoExtractScheme(const CHAR* url, 246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int url_len, 247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* scheme) { 248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Skip leading whitespace and control characters. 249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int begin = 0; 250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott while (begin < url_len && ShouldTrimFromURL(url[begin])) 251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott begin++; 252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (begin == url_len) 253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; // Input is empty or all whitespace. 254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Find the first colon character. 256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = begin; i < url_len; i++) { 257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (url[i] == ':') { 258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *scheme = MakeRange(begin, i); 259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return true; 260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; // No colon found: no scheme 263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Fills in all members of the Parsed structure except for the scheme. 266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// |spec| is the full spec being parsed, of length |spec_len|. 268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// |after_scheme| is the character immediately following the scheme (after the 269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// colon) where we'll begin parsing. 270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Compatability data points. I list "host", "path" extracted: 272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Input IE6 Firefox Us 273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// ----- -------------- -------------- -------------- 274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" 275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" 276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/" 277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/" 278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" 279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// (*) Interestingly, although IE fails to load these URLs, its history 281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// canonicalizer handles them, meaning if you've been to the corresponding 282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// "http://foo.com/" link, it will be colored. 283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochtemplate <typename CHAR> 284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid DoParseAfterScheme(const CHAR* spec, 285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int spec_len, 286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int after_scheme, 287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Parsed* parsed) { 288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); 289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int after_slashes = after_scheme + num_slashes; 290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // First split into two main parts, the authority (username, password, host, 292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // and port) and the full path (path, query, and reference). 293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Component authority; 294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Component full_path; 295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Found "//<some data>", looks like an authority section. Treat everything 297c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // from there to the next slash (or end of spec) to be the authority. Note 298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // that we ignore the number of slashes and treat it as the authority. 299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len); 300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch authority = Component(after_slashes, end_auth - after_slashes); 301c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (end_auth == spec_len) // No beginning of path found. 303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch full_path = Component(); 304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch else // Everything starting from the slash to the end is the path. 305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch full_path = Component(end_auth, spec_len - end_auth); 306c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Now parse those two sub-parts. 308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DoParseAuthority(spec, authority, &parsed->username, &parsed->password, 309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &parsed->host, &parsed->port); 310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref); 311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 312c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 313c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The main parsing function for standard URLs. Standard URLs have a scheme, 314c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// host, path, etc. 315c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 316c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) { 317c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DCHECK(spec_len >= 0); 318c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 319c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Strip leading & trailing spaces and control characters. 320c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int begin = 0; 321c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott TrimURL(spec, &begin, &spec_len); 322c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 323c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int after_scheme; 324c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (DoExtractScheme(spec, spec_len, &parsed->scheme)) { 325c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott after_scheme = parsed->scheme.end() + 1; // Skip past the colon. 326c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 3273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Say there's no scheme when there is no colon. We could also say that 328c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // everything is the scheme. Both would produce an invalid URL, but this way 329c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // seems less wrong in more cases. 330c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->scheme.reset(); 331c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott after_scheme = begin; 332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseAfterScheme(spec, spec_len, after_scheme, parsed); 334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 335c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Initializes a path URL which is merely a scheme followed by a path. Examples 337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// include "about:foo" and "javascript:alert('bar');" 338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 339c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) { 340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Get the non-path and non-scheme parts of the URL out of the way, we never 341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // use them. 342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->username.reset(); 343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->password.reset(); 344c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->host.reset(); 345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->port.reset(); 346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->query.reset(); 347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->ref.reset(); 348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Strip leading & trailing spaces and control characters. 350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int begin = 0; 351c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott TrimURL(spec, &begin, &spec_len); 352c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 353c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Handle empty specs or ones that contain only whitespace or control chars. 354c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (begin == spec_len) { 355c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->scheme.reset(); 356c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->path.reset(); 357c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 358c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 359c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 360c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Extract the scheme, with the path being everything following. We also 361c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // handle the case where there is no scheme. 362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { 363c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Offset the results since we gave ExtractScheme a substring. 364c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->scheme.begin += begin; 365c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 366c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // For compatability with the standard URL parser, we treat no path as 367c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // -1, rather than having a length of 0 (we normally wouldn't care so 368c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // much for these non-standard URLs). 369c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (parsed->scheme.end() == spec_len - 1) 370c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->path.reset(); 371c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott else 372c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->path = MakeRange(parsed->scheme.end() + 1, spec_len); 373c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No scheme found, just path. 375c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->scheme.reset(); 376c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->path = MakeRange(begin, spec_len); 377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 378c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 379c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 380c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 381c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) { 382c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DCHECK(spec_len >= 0); 383c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 384c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Get the non-path and non-scheme parts of the URL out of the way, we never 385c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // use them. 386c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->username.reset(); 387c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->password.reset(); 388c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->host.reset(); 389c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->port.reset(); 390c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->ref.reset(); 391c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->query.reset(); // May use this; reset for convenience. 392c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 393c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Strip leading & trailing spaces and control characters. 394c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int begin = 0; 395c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott TrimURL(spec, &begin, &spec_len); 396c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 397c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Handle empty specs or ones that contain only whitespace or control chars. 398c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (begin == spec_len) { 399c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->scheme.reset(); 400c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->path.reset(); 401c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 402c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 403c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 404c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int path_begin = -1; 405c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int path_end = -1; 406c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 407c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Extract the scheme, with the path being everything following. We also 408c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // handle the case where there is no scheme. 409c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { 410c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Offset the results since we gave ExtractScheme a substring. 411c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->scheme.begin += begin; 412c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 413c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (parsed->scheme.end() != spec_len - 1) { 414c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott path_begin = parsed->scheme.end() + 1; 415c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott path_end = spec_len; 416c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 417c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 418c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No scheme found, just path. 419c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->scheme.reset(); 420c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott path_begin = begin; 421c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott path_end = spec_len; 422c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 423c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 424c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Split [path_begin, path_end) into a path + query. 425c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = path_begin; i < path_end; ++i) { 426c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (spec[i] == '?') { 427c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->query = MakeRange(i + 1, path_end); 428c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott path_end = i; 429c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 430c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 431c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 432c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 433c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // For compatability with the standard URL parser, treat no path as 434c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // -1, rather than having a length of 0 435c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (path_begin == path_end) { 436c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->path.reset(); 437c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 438c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott parsed->path = MakeRange(path_begin, path_end); 439c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 440c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 441c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 442c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Converts a port number in a string to an integer. We'd like to just call 443c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// sscanf but our input is not NULL-terminated, which sscanf requires. Instead, 444c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// we copy the digits to a small stack buffer (since we know the maximum number 445c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// of digits in a valid port number) that we can NULL terminate. 446c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 447c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint DoParsePort(const CHAR* spec, const Component& component) { 448c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Easy success case when there is no port. 449c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const int kMaxDigits = 5; 450c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!component.is_nonempty()) 451c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return PORT_UNSPECIFIED; 452c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 453c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Skip over any leading 0s. 454c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component digits_comp(component.end(), 0); 455c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = 0; i < component.len; i++) { 456c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (spec[component.begin + i] != '0') { 457c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott digits_comp = MakeRange(component.begin + i, component.end()); 458c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 459c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 460c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 461c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (digits_comp.len == 0) 462c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return 0; // All digits were 0. 463c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 464c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Verify we don't have too many digits (we'll be copying to our buffer so 465c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // we need to double-check). 466c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (digits_comp.len > kMaxDigits) 467c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return PORT_INVALID; 468c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 469c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Copy valid digits to the buffer. 470c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott char digits[kMaxDigits + 1]; // +1 for null terminator 471c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = 0; i < digits_comp.len; i++) { 472c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CHAR ch = spec[digits_comp.begin + i]; 473c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!IsPortDigit(ch)) { 474c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Invalid port digit, fail. 475c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return PORT_INVALID; 476c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 477c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott digits[i] = static_cast<char>(ch); 478c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 479c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 480c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Null-terminate the string and convert to integer. Since we guarantee 481c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // only digits, atoi's lack of error handling is OK. 482c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott digits[digits_comp.len] = 0; 483c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int port = atoi(digits); 484c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (port > 65535) 485c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return PORT_INVALID; // Out of range. 486c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return port; 487c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 488c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 489c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 490c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoExtractFileName(const CHAR* spec, 491c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& path, 492c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* file_name) { 493c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Handle empty paths: they have no file names. 494c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!path.is_nonempty()) { 495c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott file_name->reset(); 496c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 497c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 498c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 499c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Search backwards for a parameter, which is a normally unused field in a 500c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // URL delimited by a semicolon. We parse the parameter as part of the 501c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // path, but here, we don't want to count it. The last semicolon is the 502c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // parameter. The path should start with a slash, so we don't need to check 503c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // the first one. 504c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int file_end = path.end(); 505c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = path.end() - 1; i > path.begin; i--) { 506c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (spec[i] == ';') { 507c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott file_end = i; 508c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 509c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 510c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 511c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 512c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Now search backwards from the filename end to the previous slash 513c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // to find the beginning of the filename. 514c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = file_end - 1; i >= path.begin; i--) { 515c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (IsURLSlash(spec[i])) { 516c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // File name is everything following this character to the end 517c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *file_name = MakeRange(i + 1, file_end); 518c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 519c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 520c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 521c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 522c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No slash found, this means the input was degenerate (generally paths 523c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // will start with a slash). Let's call everything the file name. 524c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *file_name = MakeRange(path.begin, file_end); 525c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return; 526c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 527c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 528c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR> 529c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoExtractQueryKeyValue(const CHAR* spec, 530c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* query, 531c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* key, 532c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* value) { 533c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!query->is_nonempty()) 534c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 535c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 536c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int start = query->begin; 537c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int cur = start; 538c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int end = query->end(); 539c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 540c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We assume the beginning of the input is the beginning of the "key" and we 541c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // skip to the end of it. 542c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott key->begin = cur; 543c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott while (cur < end && spec[cur] != '&' && spec[cur] != '=') 544c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur++; 545c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott key->len = cur - key->begin; 546c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 547c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Skip the separator after the key (if any). 548c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (cur < end && spec[cur] == '=') 549c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur++; 550c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 551c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Find the value part. 552c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott value->begin = cur; 553c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott while (cur < end && spec[cur] != '&') 554c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur++; 555c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott value->len = cur - value->begin; 556c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 557c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Finally skip the next separator if any 558c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (cur < end && spec[cur] == '&') 559c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur++; 560c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 561c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Save the new query 562c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *query = url_parse::MakeRange(cur, end); 563c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return true; 564c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 565c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 566c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace 567c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 568513209b27ff55e2841eac0e4120199c23acce758Ben MurdochParsed::Parsed() { 569513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch} 570513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch 571c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint Parsed::Length() const { 572c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (ref.is_valid()) 573c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return ref.end(); 574c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return CountCharactersBefore(REF, false); 575c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 576c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 577c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint Parsed::CountCharactersBefore(ComponentType type, 578c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool include_delimiter) const { 579c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type == SCHEME) 580c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return scheme.begin; 581c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 582c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // There will be some characters after the scheme like "://" and we don't 583c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // know how many. Search forwards for the next thing until we find one. 584c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int cur = 0; 585c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (scheme.is_valid()) 586c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur = scheme.end() + 1; // Advance over the ':' at the end of the scheme. 587c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 588c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (username.is_valid()) { 589c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type <= USERNAME) 590c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return username.begin; 591c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur = username.end() + 1; // Advance over the '@' or ':' at the end. 592c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 593c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 594c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (password.is_valid()) { 595c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type <= PASSWORD) 596c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return password.begin; 597c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur = password.end() + 1; // Advance over the '@' at the end. 598c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 599c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 600c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (host.is_valid()) { 601c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type <= HOST) 602c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return host.begin; 603c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur = host.end(); 604c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 605c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 606c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (port.is_valid()) { 607c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type < PORT || (type == PORT && include_delimiter)) 608c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return port.begin - 1; // Back over delimiter. 609c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type == PORT) 610c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return port.begin; // Don't want delimiter counted. 611c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur = port.end(); 612c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 613c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 614c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (path.is_valid()) { 615c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type <= PATH) 616c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return path.begin; 617c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur = path.end(); 618c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 619c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 620c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (query.is_valid()) { 621c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type < QUERY || (type == QUERY && include_delimiter)) 622c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return query.begin - 1; // Back over delimiter. 623c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type == QUERY) 624c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return query.begin; // Don't want delimiter counted. 625c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott cur = query.end(); 626c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 627c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 628c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (ref.is_valid()) { 629c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (type == REF && !include_delimiter) 630c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return ref.begin; // Back over delimiter. 631c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 632c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // When there is a ref and we get here, the component we wanted was before 633c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // this and not found, so we always know the beginning of the ref is right. 634c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return ref.begin - 1; // Don't want delimiter counted. 635c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 636c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 637c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return cur; 638c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 639c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 640c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ExtractScheme(const char* url, int url_len, Component* scheme) { 641c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoExtractScheme(url, url_len, scheme); 642c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 643c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 644c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ExtractScheme(const char16* url, int url_len, Component* scheme) { 645c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoExtractScheme(url, url_len, scheme); 646c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 647c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 648c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This handles everything that may be an authority terminator, including 649c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// backslash. For special backslash handling see DoParseAfterScheme. 650c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool IsAuthorityTerminator(char16 ch) { 6513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return IsURLSlash(ch) || ch == '?' || ch == '#'; 652c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 653c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 654c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ExtractFileName(const char* url, 655c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& path, 656c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* file_name) { 657c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoExtractFileName(url, path, file_name); 658c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 659c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 660c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ExtractFileName(const char16* url, 661c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& path, 662c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* file_name) { 663c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoExtractFileName(url, path, file_name); 664c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 665c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 666c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ExtractQueryKeyValue(const char* url, 667c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* query, 668c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* key, 669c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* value) { 670c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoExtractQueryKeyValue(url, query, key, value); 671c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 672c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 673c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ExtractQueryKeyValue(const char16* url, 674c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* query, 675c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* key, 676c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* value) { 677c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoExtractQueryKeyValue(url, query, key, value); 678c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 679c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 680c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseAuthority(const char* spec, 681c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& auth, 682c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* username, 683c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* password, 684c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* hostname, 685c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* port_num) { 686c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseAuthority(spec, auth, username, password, hostname, port_num); 687c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 688c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 689c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid ParseAuthority(const char16* spec, 690c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& auth, 691c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* username, 692c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* password, 693c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* hostname, 694c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* port_num) { 695c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseAuthority(spec, auth, username, password, hostname, port_num); 696c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 697c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 698c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint ParsePort(const char* url, const Component& port) { 699c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoParsePort(url, port); 700c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 701c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 702c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint ParsePort(const char16* url, const Component& port) { 703c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return DoParsePort(url, port); 704c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 705c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 706c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseStandardURL(const char* url, int url_len, Parsed* parsed) { 707c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseStandardURL(url, url_len, parsed); 708c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 709c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 710c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseStandardURL(const char16* url, int url_len, Parsed* parsed) { 711c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseStandardURL(url, url_len, parsed); 712c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 713c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 714c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePathURL(const char* url, int url_len, Parsed* parsed) { 715c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParsePathURL(url, url_len, parsed); 716c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 717c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 718c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePathURL(const char16* url, int url_len, Parsed* parsed) { 719c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParsePathURL(url, url_len, parsed); 720c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 721c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 722c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseMailtoURL(const char* url, int url_len, Parsed* parsed) { 723c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseMailtoURL(url, url_len, parsed); 724c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 725c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 726c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseMailtoURL(const char16* url, int url_len, Parsed* parsed) { 727c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseMailtoURL(url, url_len, parsed); 728c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 729c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 730c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePathInternal(const char* spec, 731c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& path, 732c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* filepath, 733c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* query, 734c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* ref) { 735c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ParsePath(spec, path, filepath, query, ref); 736c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 737c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 738c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePathInternal(const char16* spec, 739c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const Component& path, 740c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* filepath, 741c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* query, 742c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Component* ref) { 743c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ParsePath(spec, path, filepath, query, ref); 744c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 745c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 746c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseAfterScheme(const char* spec, 747c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 748c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int after_scheme, 749c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Parsed* parsed) { 750c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseAfterScheme(spec, spec_len, after_scheme, parsed); 751c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 752c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 753c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseAfterScheme(const char16* spec, 754c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int spec_len, 755c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int after_scheme, 756c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Parsed* parsed) { 757c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DoParseAfterScheme(spec, spec_len, after_scheme, parsed); 758c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 759c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 760c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace url_parse 761