1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)/* Based on nsURLParsers.cc from Mozilla 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * ------------------------------------- 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * The contents of this file are subject to the Mozilla Public License Version 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * 1.1 (the "License"); you may not use this file except in compliance with 5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the License. You may obtain a copy of the License at 6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * http://www.mozilla.org/MPL/ 7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Software distributed under the License is distributed on an "AS IS" basis, 9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * for the specific language governing rights and limitations under the 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * License. 12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * 13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * The Original Code is mozilla.org code. 14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * 15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * The Initial Developer of the Original Code is 16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Netscape Communications Corporation. 17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Portions created by the Initial Developer are Copyright (C) 1998 18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the Initial Developer. All Rights Reserved. 19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * 20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Contributor(s): 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Darin Fisher (original author) 22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * 23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Alternatively, the contents of this file may be used under the terms of 24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * either the GNU General Public License Version 2 or later (the "GPL"), or 25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * in which case the provisions of the GPL or the LGPL are applicable instead 27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * of those above. If you wish to allow use of your version of this file only 28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * under the terms of either the GPL or the LGPL, and not to allow others to 29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * use your version of this file under the terms of the MPL, indicate your 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * decision by deleting the provisions above and replace them with the notice 31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * and other provisions required by the GPL or the LGPL. If you do not delete 32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the provisions above, a recipient may use your version of this file under 33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the terms of any one of the MPL, the GPL or the LGPL. 34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * 35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * ***** END LICENSE BLOCK ***** */ 36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 373d4dfb6f11fb4e934d658743a8efc26d5490fdb0Ben Murdoch#include "url/third_party/mozilla/url_parse.h" 38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include <stdlib.h> 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/logging.h" 42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_parse_internal.h" 43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_util.h" 44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_util_internal.h" 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace url_parse { 47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace { 49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Returns true if the given character is a valid digit to use in a port. 517d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)inline bool IsPortDigit(base::char16 ch) { 52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return ch >= '0' && ch <= '9'; 53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Returns the offset of the next authority terminator in the input starting 56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// from start_offset. If no terminator is found, the return value will be equal 57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// to spec_len. 58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int FindNextAuthorityTerminator(const CHAR* spec, 60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int start_offset, 61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int spec_len) { 62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = start_offset; i < spec_len; i++) { 63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (IsAuthorityTerminator(spec[i])) 64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return i; 65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return spec_len; // Not found. 67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseUserInfo(const CHAR* spec, 71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& user, 72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* username, 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* password) { 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Find the first colon in the user section, which separates the username and 75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // password. 76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int colon_offset = 0; 77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) while (colon_offset < user.len && spec[user.begin + colon_offset] != ':') 78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) colon_offset++; 79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (colon_offset < user.len) { 81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Found separator: <username>:<password> 82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *username = Component(user.begin, colon_offset); 83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *password = MakeRange(user.begin + colon_offset + 1, 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) user.begin + user.len); 85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No separator, treat everything as the username 87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *username = user; 88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *password = Component(); 89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseServerInfo(const CHAR* spec, 94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& serverinfo, 95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* hostname, 96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* port_num) { 97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (serverinfo.len == 0) { 98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No server info, host name is empty. 99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) hostname->reset(); 100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) port_num->reset(); 101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // If the host starts with a left-bracket, assume the entire host is an 105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal. 106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // This assumption will be overridden if we find a right-bracket. 107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // 108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Our IPv6 address canonicalization code requires both brackets to exist, 109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // but the ability to locate an incomplete address can still be useful. 110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1; 111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int colon = -1; 112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Find the last right-bracket, and the last colon. 114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = serverinfo.begin; i < serverinfo.end(); i++) { 115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) switch (spec[i]) { 116c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) case ']': 117c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ipv6_terminator = i; 118c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) break; 119c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) case ':': 120c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) colon = i; 121c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) break; 122c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 123c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 124c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 125c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (colon > ipv6_terminator) { 126c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Found a port number: <hostname>:<port> 127c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *hostname = MakeRange(serverinfo.begin, colon); 128c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (hostname->len == 0) 129c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) hostname->reset(); 130c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *port_num = MakeRange(colon + 1, serverinfo.end()); 131c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 132c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No port: <hostname> 133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *hostname = serverinfo; 134c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) port_num->reset(); 135c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 136c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 137c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 138c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Given an already-identified auth section, breaks it into its consituent 139c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// parts. The port number will be parsed and the resulting integer will be 140c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// filled into the given *port variable, or -1 if there is no port number or it 141c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// is invalid. 142c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 143c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseAuthority(const CHAR* spec, 144c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& auth, 145c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* username, 146c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* password, 147c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* hostname, 148c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* port_num) { 149c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DCHECK(auth.is_valid()) << "We should always get an authority"; 150c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (auth.len == 0) { 151c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) username->reset(); 152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) password->reset(); 153c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) hostname->reset(); 154c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) port_num->reset(); 155c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 156c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 157c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 158c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Search backwards for @, which is the separator between the user info and 159c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // the server info. 160c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int i = auth.begin + auth.len - 1; 161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) while (i > auth.begin && spec[i] != '@') 162c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) i--; 163c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 164c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (spec[i] == '@') { 165c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Found user info: <user-info>@<server-info> 166c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ParseUserInfo(spec, Component(auth.begin, i - auth.begin), 167c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) username, password); 168c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), 169c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) hostname, port_num); 170c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 171c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No user info, everything is server info. 172c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) username->reset(); 173c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) password->reset(); 174c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ParseServerInfo(spec, auth, hostname, port_num); 175c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 176c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 177c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 178c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 179c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParsePath(const CHAR* spec, 180c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& path, 181c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* filepath, 182c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* query, 183c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* ref) { 184c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref> 185c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 186c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Special case when there is no path. 187c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (path.len == -1) { 188c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) filepath->reset(); 189c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) query->reset(); 190c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ref->reset(); 191c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 192c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 193c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DCHECK(path.len > 0) << "We should never have 0 length paths"; 194c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 195c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Search for first occurrence of either ? or #. 196c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int path_end = path.begin + path.len; 197c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 198c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int query_separator = -1; // Index of the '?' 199c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int ref_separator = -1; // Index of the '#' 200c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = path.begin; i < path_end; i++) { 201c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) switch (spec[i]) { 202c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) case '?': 203c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Only match the query string if it precedes the reference fragment 204c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // and when we haven't found one already. 205c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ref_separator < 0 && query_separator < 0) 206c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) query_separator = i; 207c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) break; 208c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) case '#': 209c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Record the first # sign only. 210c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ref_separator < 0) 211c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ref_separator = i; 212c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) break; 213c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 214c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 215c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 216c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Markers pointing to the character after each of these corresponding 217c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // components. The code below words from the end back to the beginning, 218c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // and will update these indices as it finds components that exist. 219c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int file_end, query_end; 220c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 221c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Ref fragment: from the # to the end of the path. 222c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ref_separator >= 0) { 223c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) file_end = query_end = ref_separator; 224c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *ref = MakeRange(ref_separator + 1, path_end); 225c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 226c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) file_end = query_end = path_end; 227c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ref->reset(); 228c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 229c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 230c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Query fragment: everything from the ? to the next boundary (either the end 231c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // of the path or the ref fragment). 232c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (query_separator >= 0) { 233c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) file_end = query_separator; 234c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *query = MakeRange(query_separator + 1, query_end); 235c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 236c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) query->reset(); 237c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 238c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 239c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // File path: treat an empty file path as no file path. 240c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (file_end != path.begin) 241c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *filepath = MakeRange(path.begin, file_end); 242c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else 243c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) filepath->reset(); 244c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 245c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 246c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 247c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoExtractScheme(const CHAR* url, 248c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int url_len, 249c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* scheme) { 250c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Skip leading whitespace and control characters. 251c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int begin = 0; 252c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) while (begin < url_len && ShouldTrimFromURL(url[begin])) 253c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) begin++; 254c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (begin == url_len) 255c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; // Input is empty or all whitespace. 256c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 257c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Find the first colon character. 258c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = begin; i < url_len; i++) { 259c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (url[i] == ':') { 260c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *scheme = MakeRange(begin, i); 261c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return true; 262c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 263c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 264c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; // No colon found: no scheme 265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 267c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Fills in all members of the Parsed structure except for the scheme. 268c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |spec| is the full spec being parsed, of length |spec_len|. 270c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |after_scheme| is the character immediately following the scheme (after the 271c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// colon) where we'll begin parsing. 272c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 273c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Compatability data points. I list "host", "path" extracted: 274c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Input IE6 Firefox Us 275c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// ----- -------------- -------------- -------------- 276c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" 277c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" 278c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/" 279c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/" 280c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" 281c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 282c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// (*) Interestingly, although IE fails to load these URLs, its history 283c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// canonicalizer handles them, meaning if you've been to the corresponding 284c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// "http://foo.com/" link, it will be colored. 285c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template <typename CHAR> 286c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseAfterScheme(const CHAR* spec, 287c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int spec_len, 288c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int after_scheme, 289c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Parsed* parsed) { 290c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); 291c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int after_slashes = after_scheme + num_slashes; 292c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 293c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // First split into two main parts, the authority (username, password, host, 294c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // and port) and the full path (path, query, and reference). 295c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component authority; 296c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component full_path; 297c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 298c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Found "//<some data>", looks like an authority section. Treat everything 299c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // from there to the next slash (or end of spec) to be the authority. Note 300c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // that we ignore the number of slashes and treat it as the authority. 301c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len); 302c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) authority = Component(after_slashes, end_auth - after_slashes); 303c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 304c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (end_auth == spec_len) // No beginning of path found. 305c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) full_path = Component(); 306c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else // Everything starting from the slash to the end is the path. 307c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) full_path = Component(end_auth, spec_len - end_auth); 308c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 309c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Now parse those two sub-parts. 310c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseAuthority(spec, authority, &parsed->username, &parsed->password, 311c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) &parsed->host, &parsed->port); 312c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref); 313c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 314c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 315c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The main parsing function for standard URLs. Standard URLs have a scheme, 316c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// host, path, etc. 317c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 318c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) { 319c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DCHECK(spec_len >= 0); 320c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 321c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Strip leading & trailing spaces and control characters. 322c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int begin = 0; 323c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) TrimURL(spec, &begin, &spec_len); 324c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 325c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int after_scheme; 326c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (DoExtractScheme(spec, spec_len, &parsed->scheme)) { 327c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) after_scheme = parsed->scheme.end() + 1; // Skip past the colon. 328c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 329c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Say there's no scheme when there is no colon. We could also say that 330c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // everything is the scheme. Both would produce an invalid URL, but this way 331c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // seems less wrong in more cases. 332c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.reset(); 333c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) after_scheme = begin; 334c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 335c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseAfterScheme(spec, spec_len, after_scheme, parsed); 336c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 337c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 338c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 339c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) { 340c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DCHECK(spec_len >= 0); 341c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 342c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Get the unused parts of the URL out of the way. 343c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->username.reset(); 344c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->password.reset(); 345c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->host.reset(); 346c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->port.reset(); 347c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->path.reset(); // May use this; reset for convenience. 348c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->ref.reset(); // May use this; reset for convenience. 349c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->query.reset(); // May use this; reset for convenience. 350c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->clear_inner_parsed(); // May use this; reset for convenience. 351c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 352c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Strip leading & trailing spaces and control characters. 353c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int begin = 0; 354c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) TrimURL(spec, &begin, &spec_len); 355c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 356c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Handle empty specs or ones that contain only whitespace or control chars. 357c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (begin == spec_len) { 358c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.reset(); 359c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 360c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 361c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 362c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int inner_start = -1; 363c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 364c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Extract the scheme. We also handle the case where there is no scheme. 365c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { 366c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Offset the results since we gave ExtractScheme a substring. 367c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.begin += begin; 368c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 369c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (parsed->scheme.end() == spec_len - 1) 370c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 371c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 372c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_start = parsed->scheme.end() + 1; 373c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 374c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No scheme found; that's not valid for filesystem URLs. 375c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.reset(); 376c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 377c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 378c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 379c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) url_parse::Component inner_scheme; 380c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const CHAR* inner_spec = &spec[inner_start]; 381c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int inner_spec_len = spec_len - inner_start; 382c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 383c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) { 384c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Offset the results since we gave ExtractScheme a substring. 385c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_scheme.begin += inner_start; 386c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 387c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (inner_scheme.end() == spec_len - 1) 388c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 389c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 390c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No scheme found; that's not valid for filesystem URLs. 391c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // The best we can do is return "filesystem://". 392c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 393c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 394c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 395c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Parsed inner_parsed; 396c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 397c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (url_util::CompareSchemeComponent( 398c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) spec, inner_scheme, url_util::kFileScheme)) { 399c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // File URLs are special. 400c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ParseFileURL(inner_spec, inner_spec_len, &inner_parsed); 401c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else if (url_util::CompareSchemeComponent(spec, inner_scheme, 402c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) url_util::kFileSystemScheme)) { 403c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Filesystem URLs don't nest. 404c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 405c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else if (url_util::IsStandard(spec, inner_scheme)) { 406c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // All "normal" URLs. 407c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed); 408c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 409c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 410c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 411c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 412c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // All members of inner_parsed need to be offset by inner_start. 413c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // If we had any scheme that supported nesting more than one level deep, 414c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // we'd have to recurse into the inner_parsed's inner_parsed when 415c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // adjusting by inner_start. 416c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.scheme.begin += inner_start; 417c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.username.begin += inner_start; 418c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.password.begin += inner_start; 419c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.host.begin += inner_start; 420c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.port.begin += inner_start; 421c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.query.begin += inner_start; 422c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.ref.begin += inner_start; 423c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.path.begin += inner_start; 424c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 425c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Query and ref move from inner_parsed to parsed. 426c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->query = inner_parsed.query; 427c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.query.reset(); 428c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->ref = inner_parsed.ref; 429c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.ref.reset(); 430c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 431c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->set_inner_parsed(inner_parsed); 432c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!inner_parsed.scheme.is_valid() || !inner_parsed.path.is_valid() || 433c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed.inner_parsed()) { 434c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 435c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 436c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 437c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // The path in inner_parsed should start with a slash, then have a filesystem 438c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // type followed by a slash. From the first slash up to but excluding the 439c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // second should be what it keeps; the rest goes to parsed. If the path ends 440c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // before the second slash, it's still pretty clear what the user meant, so 441c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // we'll let that through. 442c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!IsURLSlash(spec[inner_parsed.path.begin])) { 443c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 444c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 445c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int inner_path_end = inner_parsed.path.begin + 1; // skip the leading slash 446c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) while (inner_path_end < spec_len && 447c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) !IsURLSlash(spec[inner_path_end])) 448c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ++inner_path_end; 449c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->path.begin = inner_path_end; 450c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int new_inner_path_length = inner_path_end - inner_parsed.path.begin; 451c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->path.len = inner_parsed.path.len - new_inner_path_length; 452c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->inner_parsed()->path.len = new_inner_path_length; 453c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 454c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 455c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Initializes a path URL which is merely a scheme followed by a path. Examples 456c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// include "about:foo" and "javascript:alert('bar');" 457c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 458c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) { 459c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Get the non-path and non-scheme parts of the URL out of the way, we never 460c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // use them. 461c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->username.reset(); 462c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->password.reset(); 463c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->host.reset(); 464c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->port.reset(); 46506797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon parsed->path.reset(); 466c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->query.reset(); 467c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->ref.reset(); 468c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 469c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Strip leading & trailing spaces and control characters. 470c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int begin = 0; 471c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) TrimURL(spec, &begin, &spec_len); 472c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 473c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Handle empty specs or ones that contain only whitespace or control chars. 474c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (begin == spec_len) { 475c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.reset(); 476c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->path.reset(); 477c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 478c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 479c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 480c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Extract the scheme, with the path being everything following. We also 481c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // handle the case where there is no scheme. 482c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { 483c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Offset the results since we gave ExtractScheme a substring. 484c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.begin += begin; 48506797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon begin = parsed->scheme.end() + 1; 486c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 487c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.reset(); 488c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 48906797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon 49006797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon if (begin == spec_len) 49106797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon return; 49206797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon DCHECK_LT(begin, spec_len); 49306797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon 49406797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon ParsePath(spec, 49506797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon MakeRange(begin, spec_len), 49606797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon &parsed->path, 49706797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon &parsed->query, 49806797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon &parsed->ref); 499c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 500c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 501c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 502c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) { 503c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DCHECK(spec_len >= 0); 504c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 505c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Get the non-path and non-scheme parts of the URL out of the way, we never 506c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // use them. 507c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->username.reset(); 508c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->password.reset(); 509c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->host.reset(); 510c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->port.reset(); 511c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->ref.reset(); 512c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->query.reset(); // May use this; reset for convenience. 513c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 514c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Strip leading & trailing spaces and control characters. 515c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int begin = 0; 516c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) TrimURL(spec, &begin, &spec_len); 517c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 518c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Handle empty specs or ones that contain only whitespace or control chars. 519c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (begin == spec_len) { 520c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.reset(); 521c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->path.reset(); 522c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 523c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 524c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 525c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int path_begin = -1; 526c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int path_end = -1; 527c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 528c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Extract the scheme, with the path being everything following. We also 529c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // handle the case where there is no scheme. 530c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { 531c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Offset the results since we gave ExtractScheme a substring. 532c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.begin += begin; 533c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 534c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (parsed->scheme.end() != spec_len - 1) { 535c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path_begin = parsed->scheme.end() + 1; 536c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path_end = spec_len; 537c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 538c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 539c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No scheme found, just path. 540c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->scheme.reset(); 541c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path_begin = begin; 542c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path_end = spec_len; 543c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 544c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 545c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Split [path_begin, path_end) into a path + query. 546c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = path_begin; i < path_end; ++i) { 547c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (spec[i] == '?') { 548c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->query = MakeRange(i + 1, path_end); 549c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path_end = i; 550c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) break; 551c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 552c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 553c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 554c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // For compatability with the standard URL parser, treat no path as 555c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // -1, rather than having a length of 0 556c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (path_begin == path_end) { 557c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->path.reset(); 558c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 559c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parsed->path = MakeRange(path_begin, path_end); 560c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 561c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 562c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 563c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Converts a port number in a string to an integer. We'd like to just call 564c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// sscanf but our input is not NULL-terminated, which sscanf requires. Instead, 565c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// we copy the digits to a small stack buffer (since we know the maximum number 566c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// of digits in a valid port number) that we can NULL terminate. 567c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 568c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int DoParsePort(const CHAR* spec, const Component& component) { 569c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Easy success case when there is no port. 570c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const int kMaxDigits = 5; 571c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!component.is_nonempty()) 572c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return PORT_UNSPECIFIED; 573c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 574c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Skip over any leading 0s. 575c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component digits_comp(component.end(), 0); 576c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = 0; i < component.len; i++) { 577c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (spec[component.begin + i] != '0') { 578c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) digits_comp = MakeRange(component.begin + i, component.end()); 579c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) break; 580c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 581c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 582c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (digits_comp.len == 0) 583c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return 0; // All digits were 0. 584c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 585c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Verify we don't have too many digits (we'll be copying to our buffer so 586c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // we need to double-check). 587c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (digits_comp.len > kMaxDigits) 588c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return PORT_INVALID; 589c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 590c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Copy valid digits to the buffer. 591c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) char digits[kMaxDigits + 1]; // +1 for null terminator 592c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = 0; i < digits_comp.len; i++) { 593c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CHAR ch = spec[digits_comp.begin + i]; 594c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!IsPortDigit(ch)) { 595c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Invalid port digit, fail. 596c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return PORT_INVALID; 597c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 598c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) digits[i] = static_cast<char>(ch); 599c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 600c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 601c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Null-terminate the string and convert to integer. Since we guarantee 602c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // only digits, atoi's lack of error handling is OK. 603c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) digits[digits_comp.len] = 0; 604c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int port = atoi(digits); 605c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (port > 65535) 606c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return PORT_INVALID; // Out of range. 607c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return port; 608c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 609c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 610c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 611c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoExtractFileName(const CHAR* spec, 612c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& path, 613c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* file_name) { 614c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Handle empty paths: they have no file names. 615c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!path.is_nonempty()) { 616c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) file_name->reset(); 617c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 618c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 619c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 620c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Search backwards for a parameter, which is a normally unused field in a 621c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // URL delimited by a semicolon. We parse the parameter as part of the 622c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // path, but here, we don't want to count it. The last semicolon is the 623c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // parameter. The path should start with a slash, so we don't need to check 624c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // the first one. 625c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int file_end = path.end(); 626c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = path.end() - 1; i > path.begin; i--) { 627c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (spec[i] == ';') { 628c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) file_end = i; 629c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) break; 630c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 631c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 632c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 633c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Now search backwards from the filename end to the previous slash 634c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // to find the beginning of the filename. 635c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = file_end - 1; i >= path.begin; i--) { 636c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (IsURLSlash(spec[i])) { 637c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // File name is everything following this character to the end 638c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *file_name = MakeRange(i + 1, file_end); 639c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 640c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 641c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 642c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 643c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No slash found, this means the input was degenerate (generally paths 644c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // will start with a slash). Let's call everything the file name. 645c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *file_name = MakeRange(path.begin, file_end); 646c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 647c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 648c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 649c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR> 650c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoExtractQueryKeyValue(const CHAR* spec, 651c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* query, 652c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* key, 653c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* value) { 654c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!query->is_nonempty()) 655c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; 656c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 657c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int start = query->begin; 658c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int cur = start; 659c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int end = query->end(); 660c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 661c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // We assume the beginning of the input is the beginning of the "key" and we 662c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // skip to the end of it. 663c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) key->begin = cur; 664c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) while (cur < end && spec[cur] != '&' && spec[cur] != '=') 665c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur++; 666c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) key->len = cur - key->begin; 667c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 668c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Skip the separator after the key (if any). 669c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (cur < end && spec[cur] == '=') 670c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur++; 671c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 672c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Find the value part. 673c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) value->begin = cur; 674c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) while (cur < end && spec[cur] != '&') 675c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur++; 676c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) value->len = cur - value->begin; 677c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 678c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Finally skip the next separator if any 679c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (cur < end && spec[cur] == '&') 680c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur++; 681c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 682c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Save the new query 683c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *query = url_parse::MakeRange(cur, end); 684c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return true; 685c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 686c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 687c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} // namespace 688c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 689c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)Parsed::Parsed() : inner_parsed_(NULL) { 690c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 691c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 692c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)Parsed::Parsed(const Parsed& other) : 693c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) scheme(other.scheme), 694c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) username(other.username), 695c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) password(other.password), 696c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) host(other.host), 697c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) port(other.port), 698c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path(other.path), 699c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) query(other.query), 700c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ref(other.ref), 701c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) inner_parsed_(NULL) { 702c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (other.inner_parsed_) 703c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) set_inner_parsed(*other.inner_parsed_); 704c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 705c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 706c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)Parsed& Parsed::operator=(const Parsed& other) { 707c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (this != &other) { 708c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) scheme = other.scheme; 709c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) username = other.username; 710c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) password = other.password; 711c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) host = other.host; 712c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) port = other.port; 713c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) path = other.path; 714c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) query = other.query; 715c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ref = other.ref; 716c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (other.inner_parsed_) 717c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) set_inner_parsed(*other.inner_parsed_); 718c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else 719c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) clear_inner_parsed(); 720c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 721c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return *this; 722c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 723c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 724c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)Parsed::~Parsed() { 725c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) delete inner_parsed_; 726c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 727c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 728c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int Parsed::Length() const { 729c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ref.is_valid()) 730c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return ref.end(); 731c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return CountCharactersBefore(REF, false); 732c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 733c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 734c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int Parsed::CountCharactersBefore(ComponentType type, 735c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool include_delimiter) const { 736c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type == SCHEME) 737c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return scheme.begin; 738c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 739c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // There will be some characters after the scheme like "://" and we don't 740c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // know how many. Search forwards for the next thing until we find one. 741c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int cur = 0; 742c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (scheme.is_valid()) 743c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur = scheme.end() + 1; // Advance over the ':' at the end of the scheme. 744c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 745c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (username.is_valid()) { 746c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type <= USERNAME) 747c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return username.begin; 748c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur = username.end() + 1; // Advance over the '@' or ':' at the end. 749c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 750c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 751c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (password.is_valid()) { 752c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type <= PASSWORD) 753c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return password.begin; 754c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur = password.end() + 1; // Advance over the '@' at the end. 755c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 756c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 757c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (host.is_valid()) { 758c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type <= HOST) 759c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return host.begin; 760c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur = host.end(); 761c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 762c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 763c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (port.is_valid()) { 764c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type < PORT || (type == PORT && include_delimiter)) 765c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return port.begin - 1; // Back over delimiter. 766c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type == PORT) 767c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return port.begin; // Don't want delimiter counted. 768c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur = port.end(); 769c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 770c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 771c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (path.is_valid()) { 772c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type <= PATH) 773c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return path.begin; 774c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur = path.end(); 775c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 776c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 777c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (query.is_valid()) { 778c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type < QUERY || (type == QUERY && include_delimiter)) 779c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return query.begin - 1; // Back over delimiter. 780c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type == QUERY) 781c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return query.begin; // Don't want delimiter counted. 782c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) cur = query.end(); 783c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 784c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 785c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ref.is_valid()) { 786c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type == REF && !include_delimiter) 787c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return ref.begin; // Back over delimiter. 788c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 789c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // When there is a ref and we get here, the component we wanted was before 790c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // this and not found, so we always know the beginning of the ref is right. 791c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return ref.begin - 1; // Don't want delimiter counted. 792c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 793c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 794c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return cur; 795c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 796c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 797de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan DixonComponent Parsed::GetContent() const { 798de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon const int begin = CountCharactersBefore(USERNAME, false); 799de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon const int len = Length() - begin; 800de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon // For compatability with the standard URL parser, we treat no content as 801de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon // -1, rather than having a length of 0 (we normally wouldn't care so 802de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon // much for these non-standard URLs). 803de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon return len ? Component(begin, len) : Component(); 804de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon} 805de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon 806c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool ExtractScheme(const char* url, int url_len, Component* scheme) { 807c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoExtractScheme(url, url_len, scheme); 808c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 809c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8107d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool ExtractScheme(const base::char16* url, int url_len, Component* scheme) { 811c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoExtractScheme(url, url_len, scheme); 812c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 813c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 814c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This handles everything that may be an authority terminator, including 815c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// backslash. For special backslash handling see DoParseAfterScheme. 8167d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool IsAuthorityTerminator(base::char16 ch) { 817c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return IsURLSlash(ch) || ch == '?' || ch == '#'; 818c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 819c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 820c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ExtractFileName(const char* url, 821c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& path, 822c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* file_name) { 823c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoExtractFileName(url, path, file_name); 824c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 825c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8267d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ExtractFileName(const base::char16* url, 827c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& path, 828c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* file_name) { 829c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoExtractFileName(url, path, file_name); 830c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 831c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 832c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool ExtractQueryKeyValue(const char* url, 833c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* query, 834c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* key, 835c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* value) { 836c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoExtractQueryKeyValue(url, query, key, value); 837c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 838c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8397d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool ExtractQueryKeyValue(const base::char16* url, 840c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* query, 841c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* key, 842c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* value) { 843c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoExtractQueryKeyValue(url, query, key, value); 844c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 845c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 846c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseAuthority(const char* spec, 847c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& auth, 848c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* username, 849c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* password, 850c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* hostname, 851c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* port_num) { 852c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseAuthority(spec, auth, username, password, hostname, port_num); 853c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 854c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8557d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseAuthority(const base::char16* spec, 856c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& auth, 857c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* username, 858c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* password, 859c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* hostname, 860c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* port_num) { 861c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseAuthority(spec, auth, username, password, hostname, port_num); 862c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 863c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 864c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int ParsePort(const char* url, const Component& port) { 865c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoParsePort(url, port); 866c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 867c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8687d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)int ParsePort(const base::char16* url, const Component& port) { 869c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoParsePort(url, port); 870c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 871c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 872c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseStandardURL(const char* url, int url_len, Parsed* parsed) { 873c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseStandardURL(url, url_len, parsed); 874c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 875c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8767d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed) { 877c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseStandardURL(url, url_len, parsed); 878c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 879c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 880c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParsePathURL(const char* url, int url_len, Parsed* parsed) { 881c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParsePathURL(url, url_len, parsed); 882c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 883c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8847d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParsePathURL(const base::char16* url, int url_len, Parsed* parsed) { 885c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParsePathURL(url, url_len, parsed); 886c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 887c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 888c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) { 889c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseFileSystemURL(url, url_len, parsed); 890c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 891c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 8927d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed) { 893c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseFileSystemURL(url, url_len, parsed); 894c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 895c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 896c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) { 897c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseMailtoURL(url, url_len, parsed); 898c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 899c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 9007d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed) { 901c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseMailtoURL(url, url_len, parsed); 902c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 903c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 904c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParsePathInternal(const char* spec, 905c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& path, 906c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* filepath, 907c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* query, 908c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* ref) { 909c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ParsePath(spec, path, filepath, query, ref); 910c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 911c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 9127d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParsePathInternal(const base::char16* spec, 913c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Component& path, 914c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* filepath, 915c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* query, 916c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Component* ref) { 917c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ParsePath(spec, path, filepath, query, ref); 918c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 919c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 920c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseAfterScheme(const char* spec, 921c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int spec_len, 922c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int after_scheme, 923c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Parsed* parsed) { 924c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseAfterScheme(spec, spec_len, after_scheme, parsed); 925c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 926c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 9277d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseAfterScheme(const base::char16* spec, 928c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int spec_len, 929c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int after_scheme, 930c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Parsed* parsed) { 931c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) DoParseAfterScheme(spec, spec_len, after_scheme, parsed); 932c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 933c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 934c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} // namespace url_parse 935