1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)/* Based on nsURLParsers.cc from Mozilla
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * -------------------------------------
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * The contents of this file are subject to the Mozilla Public License Version
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * 1.1 (the "License"); you may not use this file except in compliance with
5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the License. You may obtain a copy of the License at
6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * http://www.mozilla.org/MPL/
7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *
8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Software distributed under the License is distributed on an "AS IS" basis,
9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * for the specific language governing rights and limitations under the
11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * License.
12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *
13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * The Original Code is mozilla.org code.
14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *
15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * The Initial Developer of the Original Code is
16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Netscape Communications Corporation.
17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Portions created by the Initial Developer are Copyright (C) 1998
18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the Initial Developer. All Rights Reserved.
19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Contributor(s):
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *   Darin Fisher (original author)
22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *
23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Alternatively, the contents of this file may be used under the terms of
24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * either the GNU General Public License Version 2 or later (the "GPL"), or
25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * in which case the provisions of the GPL or the LGPL are applicable instead
27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * of those above. If you wish to allow use of your version of this file only
28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * under the terms of either the GPL or the LGPL, and not to allow others to
29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * use your version of this file under the terms of the MPL, indicate your
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * decision by deleting the provisions above and replace them with the notice
31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * and other provisions required by the GPL or the LGPL. If you do not delete
32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the provisions above, a recipient may use your version of this file under
33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * the terms of any one of the MPL, the GPL or the LGPL.
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *
35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * ***** END LICENSE BLOCK ***** */
36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
373d4dfb6f11fb4e934d658743a8efc26d5490fdb0Ben Murdoch#include "url/third_party/mozilla/url_parse.h"
38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include <stdlib.h>
40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/logging.h"
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_parse_internal.h"
43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_util.h"
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_util_internal.h"
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace url_parse {
47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace {
49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Returns true if the given character is a valid digit to use in a port.
517d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)inline bool IsPortDigit(base::char16 ch) {
52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return ch >= '0' && ch <= '9';
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Returns the offset of the next authority terminator in the input starting
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// from start_offset. If no terminator is found, the return value will be equal
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// to spec_len.
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int FindNextAuthorityTerminator(const CHAR* spec,
60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                                int start_offset,
61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                                int spec_len) {
62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = start_offset; i < spec_len; i++) {
63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (IsAuthorityTerminator(spec[i]))
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return i;
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return spec_len;  // Not found.
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseUserInfo(const CHAR* spec,
71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                   const Component& user,
72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                   Component* username,
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                   Component* password) {
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Find the first colon in the user section, which separates the username and
75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // password.
76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int colon_offset = 0;
77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')
78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    colon_offset++;
79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (colon_offset < user.len) {
81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Found separator: <username>:<password>
82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *username = Component(user.begin, colon_offset);
83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *password = MakeRange(user.begin + colon_offset + 1,
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                          user.begin + user.len);
85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // No separator, treat everything as the username
87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *username = user;
88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *password = Component();
89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseServerInfo(const CHAR* spec,
94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     const Component& serverinfo,
95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     Component* hostname,
96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     Component* port_num) {
97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (serverinfo.len == 0) {
98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // No server info, host name is empty.
99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    hostname->reset();
100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    port_num->reset();
101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // If the host starts with a left-bracket, assume the entire host is an
105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // IPv6 literal.  Otherwise, assume none of the host is an IPv6 literal.
106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // This assumption will be overridden if we find a right-bracket.
107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  //
108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Our IPv6 address canonicalization code requires both brackets to exist,
109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // but the ability to locate an incomplete address can still be useful.
110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;
111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int colon = -1;
112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Find the last right-bracket, and the last colon.
114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = serverinfo.begin; i < serverinfo.end(); i++) {
115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    switch (spec[i]) {
116c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      case ']':
117c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        ipv6_terminator = i;
118c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        break;
119c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      case ':':
120c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        colon = i;
121c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        break;
122c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
123c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
124c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
125c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (colon > ipv6_terminator) {
126c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Found a port number: <hostname>:<port>
127c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *hostname = MakeRange(serverinfo.begin, colon);
128c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (hostname->len == 0)
129c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      hostname->reset();
130c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *port_num = MakeRange(colon + 1, serverinfo.end());
131c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
132c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // No port: <hostname>
133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *hostname = serverinfo;
134c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    port_num->reset();
135c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
136c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
137c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
138c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Given an already-identified auth section, breaks it into its consituent
139c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// parts. The port number will be parsed and the resulting integer will be
140c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// filled into the given *port variable, or -1 if there is no port number or it
141c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// is invalid.
142c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
143c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseAuthority(const CHAR* spec,
144c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      const Component& auth,
145c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      Component* username,
146c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      Component* password,
147c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      Component* hostname,
148c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      Component* port_num) {
149c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DCHECK(auth.is_valid()) << "We should always get an authority";
150c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (auth.len == 0) {
151c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    username->reset();
152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    password->reset();
153c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    hostname->reset();
154c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    port_num->reset();
155c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
156c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
157c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
158c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Search backwards for @, which is the separator between the user info and
159c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // the server info.
160c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int i = auth.begin + auth.len - 1;
161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (i > auth.begin && spec[i] != '@')
162c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    i--;
163c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
164c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (spec[i] == '@') {
165c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Found user info: <user-info>@<server-info>
166c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ParseUserInfo(spec, Component(auth.begin, i - auth.begin),
167c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                  username, password);
168c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len),
169c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    hostname, port_num);
170c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
171c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // No user info, everything is server info.
172c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    username->reset();
173c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    password->reset();
174c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ParseServerInfo(spec, auth, hostname, port_num);
175c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
176c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
177c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
178c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
179c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParsePath(const CHAR* spec,
180c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               const Component& path,
181c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               Component* filepath,
182c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               Component* query,
183c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)               Component* ref) {
184c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
185c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
186c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Special case when there is no path.
187c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (path.len == -1) {
188c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    filepath->reset();
189c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    query->reset();
190c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ref->reset();
191c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
192c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
193c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DCHECK(path.len > 0) << "We should never have 0 length paths";
194c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
195c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Search for first occurrence of either ? or #.
196c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int path_end = path.begin + path.len;
197c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
198c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int query_separator = -1;  // Index of the '?'
199c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int ref_separator = -1;    // Index of the '#'
200c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = path.begin; i < path_end; i++) {
201c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    switch (spec[i]) {
202c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      case '?':
203c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        // Only match the query string if it precedes the reference fragment
204c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        // and when we haven't found one already.
205c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        if (ref_separator < 0 && query_separator < 0)
206c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          query_separator = i;
207c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        break;
208c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      case '#':
209c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        // Record the first # sign only.
210c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        if (ref_separator < 0)
211c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          ref_separator = i;
212c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        break;
213c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
214c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
215c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
216c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Markers pointing to the character after each of these corresponding
217c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // components. The code below words from the end back to the beginning,
218c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // and will update these indices as it finds components that exist.
219c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int file_end, query_end;
220c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
221c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Ref fragment: from the # to the end of the path.
222c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (ref_separator >= 0) {
223c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    file_end = query_end = ref_separator;
224c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *ref = MakeRange(ref_separator + 1, path_end);
225c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
226c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    file_end = query_end = path_end;
227c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ref->reset();
228c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
229c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
230c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Query fragment: everything from the ? to the next boundary (either the end
231c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // of the path or the ref fragment).
232c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (query_separator >= 0) {
233c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    file_end = query_separator;
234c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *query = MakeRange(query_separator + 1, query_end);
235c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
236c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    query->reset();
237c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
238c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
239c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // File path: treat an empty file path as no file path.
240c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (file_end != path.begin)
241c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    *filepath = MakeRange(path.begin, file_end);
242c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  else
243c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    filepath->reset();
244c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
245c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
246c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
247c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoExtractScheme(const CHAR* url,
248c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     int url_len,
249c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     Component* scheme) {
250c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Skip leading whitespace and control characters.
251c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int begin = 0;
252c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (begin < url_len && ShouldTrimFromURL(url[begin]))
253c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    begin++;
254c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (begin == url_len)
255c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return false;  // Input is empty or all whitespace.
256c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
257c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Find the first colon character.
258c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = begin; i < url_len; i++) {
259c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (url[i] == ':') {
260c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      *scheme = MakeRange(begin, i);
261c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return true;
262c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
263c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
264c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return false;  // No colon found: no scheme
265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
267c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Fills in all members of the Parsed structure except for the scheme.
268c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)//
269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |spec| is the full spec being parsed, of length |spec_len|.
270c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// |after_scheme| is the character immediately following the scheme (after the
271c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)//   colon) where we'll begin parsing.
272c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)//
273c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Compatability data points. I list "host", "path" extracted:
274c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Input                IE6             Firefox                Us
275c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// -----                --------------  --------------         --------------
276c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http://foo.com/      "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
277c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http:foo.com/        "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
278c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http:/foo.com/       fail(*)         "foo.com", "/"         "foo.com", "/"
279c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http:\foo.com/       fail(*)         "\foo.com", "/"(fail)  "foo.com", "/"
280c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// http:////foo.com/    "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
281c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)//
282c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// (*) Interestingly, although IE fails to load these URLs, its history
283c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// canonicalizer handles them, meaning if you've been to the corresponding
284c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// "http://foo.com/" link, it will be colored.
285c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template <typename CHAR>
286c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseAfterScheme(const CHAR* spec,
287c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                        int spec_len,
288c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                        int after_scheme,
289c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                        Parsed* parsed) {
290c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
291c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int after_slashes = after_scheme + num_slashes;
292c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
293c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // First split into two main parts, the authority (username, password, host,
294c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // and port) and the full path (path, query, and reference).
295c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Component authority;
296c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Component full_path;
297c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
298c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Found "//<some data>", looks like an authority section. Treat everything
299c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // from there to the next slash (or end of spec) to be the authority. Note
300c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // that we ignore the number of slashes and treat it as the authority.
301c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
302c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  authority = Component(after_slashes, end_auth - after_slashes);
303c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
304c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (end_auth == spec_len)  // No beginning of path found.
305c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    full_path = Component();
306c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  else  // Everything starting from the slash to the end is the path.
307c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    full_path = Component(end_auth, spec_len - end_auth);
308c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
309c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Now parse those two sub-parts.
310c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
311c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                   &parsed->host, &parsed->port);
312c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
313c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
314c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
315c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// The main parsing function for standard URLs. Standard URLs have a scheme,
316c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// host, path, etc.
317c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
318c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {
319c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DCHECK(spec_len >= 0);
320c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
321c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Strip leading & trailing spaces and control characters.
322c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int begin = 0;
323c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  TrimURL(spec, &begin, &spec_len);
324c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
325c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int after_scheme;
326c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
327c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    after_scheme = parsed->scheme.end() + 1;  // Skip past the colon.
328c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
329c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Say there's no scheme when there is no colon. We could also say that
330c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // everything is the scheme. Both would produce an invalid URL, but this way
331c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // seems less wrong in more cases.
332c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.reset();
333c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    after_scheme = begin;
334c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
335c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
336c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
337c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
338c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
339c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) {
340c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DCHECK(spec_len >= 0);
341c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
342c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Get the unused parts of the URL out of the way.
343c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->username.reset();
344c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->password.reset();
345c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->host.reset();
346c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->port.reset();
347c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->path.reset();   // May use this; reset for convenience.
348c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->ref.reset();    // May use this; reset for convenience.
349c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->query.reset();  // May use this; reset for convenience.
350c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->clear_inner_parsed();  // May use this; reset for convenience.
351c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
352c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Strip leading & trailing spaces and control characters.
353c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int begin = 0;
354c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  TrimURL(spec, &begin, &spec_len);
355c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
356c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Handle empty specs or ones that contain only whitespace or control chars.
357c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (begin == spec_len) {
358c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.reset();
359c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
360c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
361c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
362c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int inner_start = -1;
363c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
364c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Extract the scheme.  We also handle the case where there is no scheme.
365c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
366c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Offset the results since we gave ExtractScheme a substring.
367c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.begin += begin;
368c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
369c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (parsed->scheme.end() == spec_len - 1)
370c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return;
371c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
372c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    inner_start = parsed->scheme.end() + 1;
373c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
374c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // No scheme found; that's not valid for filesystem URLs.
375c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.reset();
376c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
377c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
378c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
379c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  url_parse::Component inner_scheme;
380c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  const CHAR* inner_spec = &spec[inner_start];
381c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int inner_spec_len = spec_len - inner_start;
382c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
383c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) {
384c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Offset the results since we gave ExtractScheme a substring.
385c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    inner_scheme.begin += inner_start;
386c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
387c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (inner_scheme.end() == spec_len - 1)
388c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return;
389c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
390c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // No scheme found; that's not valid for filesystem URLs.
391c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // The best we can do is return "filesystem://".
392c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
393c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
394c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
395c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Parsed inner_parsed;
396c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
397c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (url_util::CompareSchemeComponent(
398c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      spec, inner_scheme, url_util::kFileScheme)) {
399c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // File URLs are special.
400c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ParseFileURL(inner_spec, inner_spec_len, &inner_parsed);
401c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else if (url_util::CompareSchemeComponent(spec, inner_scheme,
402c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      url_util::kFileSystemScheme)) {
403c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Filesystem URLs don't nest.
404c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
405c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else if (url_util::IsStandard(spec, inner_scheme)) {
406c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // All "normal" URLs.
407c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed);
408c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
409c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
410c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
411c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
412c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // All members of inner_parsed need to be offset by inner_start.
413c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // If we had any scheme that supported nesting more than one level deep,
414c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // we'd have to recurse into the inner_parsed's inner_parsed when
415c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // adjusting by inner_start.
416c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.scheme.begin += inner_start;
417c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.username.begin += inner_start;
418c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.password.begin += inner_start;
419c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.host.begin += inner_start;
420c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.port.begin += inner_start;
421c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.query.begin += inner_start;
422c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.ref.begin += inner_start;
423c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.path.begin += inner_start;
424c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
425c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Query and ref move from inner_parsed to parsed.
426c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->query = inner_parsed.query;
427c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.query.reset();
428c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->ref = inner_parsed.ref;
429c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  inner_parsed.ref.reset();
430c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
431c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->set_inner_parsed(inner_parsed);
432c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!inner_parsed.scheme.is_valid() || !inner_parsed.path.is_valid() ||
433c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      inner_parsed.inner_parsed()) {
434c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
435c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
436c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
437c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // The path in inner_parsed should start with a slash, then have a filesystem
438c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // type followed by a slash.  From the first slash up to but excluding the
439c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // second should be what it keeps; the rest goes to parsed.  If the path ends
440c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // before the second slash, it's still pretty clear what the user meant, so
441c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // we'll let that through.
442c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!IsURLSlash(spec[inner_parsed.path.begin])) {
443c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
444c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
445c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int inner_path_end = inner_parsed.path.begin + 1;  // skip the leading slash
446c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (inner_path_end < spec_len &&
447c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      !IsURLSlash(spec[inner_path_end]))
448c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ++inner_path_end;
449c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->path.begin = inner_path_end;
450c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int new_inner_path_length = inner_path_end - inner_parsed.path.begin;
451c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->path.len = inner_parsed.path.len - new_inner_path_length;
452c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->inner_parsed()->path.len = new_inner_path_length;
453c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
454c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
455c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Initializes a path URL which is merely a scheme followed by a path. Examples
456c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// include "about:foo" and "javascript:alert('bar');"
457c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
458c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) {
459c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Get the non-path and non-scheme parts of the URL out of the way, we never
460c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // use them.
461c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->username.reset();
462c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->password.reset();
463c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->host.reset();
464c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->port.reset();
46506797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon  parsed->path.reset();
466c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->query.reset();
467c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->ref.reset();
468c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
469c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Strip leading & trailing spaces and control characters.
470c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int begin = 0;
471c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  TrimURL(spec, &begin, &spec_len);
472c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
473c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Handle empty specs or ones that contain only whitespace or control chars.
474c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (begin == spec_len) {
475c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.reset();
476c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->path.reset();
477c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
478c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
479c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
480c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Extract the scheme, with the path being everything following. We also
481c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // handle the case where there is no scheme.
482c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
483c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Offset the results since we gave ExtractScheme a substring.
484c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.begin += begin;
48506797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon    begin = parsed->scheme.end() + 1;
486c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
487c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.reset();
488c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
48906797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon
49006797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon  if (begin == spec_len)
49106797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon    return;
49206797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon  DCHECK_LT(begin, spec_len);
49306797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon
49406797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon  ParsePath(spec,
49506797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon            MakeRange(begin, spec_len),
49606797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon            &parsed->path,
49706797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon            &parsed->query,
49806797e4204bbdcbd3b7f455e312b54c656b4f6b2Jonathan Dixon            &parsed->ref);
499c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
500c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
501c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
502c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) {
503c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DCHECK(spec_len >= 0);
504c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
505c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Get the non-path and non-scheme parts of the URL out of the way, we never
506c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // use them.
507c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->username.reset();
508c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->password.reset();
509c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->host.reset();
510c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->port.reset();
511c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->ref.reset();
512c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  parsed->query.reset();  // May use this; reset for convenience.
513c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
514c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Strip leading & trailing spaces and control characters.
515c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int begin = 0;
516c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  TrimURL(spec, &begin, &spec_len);
517c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
518c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Handle empty specs or ones that contain only whitespace or control chars.
519c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (begin == spec_len) {
520c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.reset();
521c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->path.reset();
522c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
523c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
524c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
525c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int path_begin = -1;
526c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int path_end = -1;
527c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
528c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Extract the scheme, with the path being everything following. We also
529c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // handle the case where there is no scheme.
530c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
531c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Offset the results since we gave ExtractScheme a substring.
532c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.begin += begin;
533c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
534c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (parsed->scheme.end() != spec_len - 1) {
535c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      path_begin = parsed->scheme.end() + 1;
536c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      path_end = spec_len;
537c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
538c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
539c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // No scheme found, just path.
540c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->scheme.reset();
541c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    path_begin = begin;
542c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    path_end = spec_len;
543c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
544c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
545c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Split [path_begin, path_end) into a path + query.
546c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = path_begin; i < path_end; ++i) {
547c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (spec[i] == '?') {
548c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      parsed->query = MakeRange(i + 1, path_end);
549c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      path_end = i;
550c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      break;
551c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
552c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
553c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
554c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // For compatability with the standard URL parser, treat no path as
555c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // -1, rather than having a length of 0
556c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (path_begin == path_end) {
557c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->path.reset();
558c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  } else {
559c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    parsed->path = MakeRange(path_begin, path_end);
560c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
561c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
562c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
563c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Converts a port number in a string to an integer. We'd like to just call
564c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
565c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// we copy the digits to a small stack buffer (since we know the maximum number
566c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// of digits in a valid port number) that we can NULL terminate.
567c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
568c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int DoParsePort(const CHAR* spec, const Component& component) {
569c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Easy success case when there is no port.
570c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  const int kMaxDigits = 5;
571c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!component.is_nonempty())
572c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return PORT_UNSPECIFIED;
573c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
574c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Skip over any leading 0s.
575c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  Component digits_comp(component.end(), 0);
576c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = 0; i < component.len; i++) {
577c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (spec[component.begin + i] != '0') {
578c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      digits_comp = MakeRange(component.begin + i, component.end());
579c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      break;
580c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
581c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
582c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (digits_comp.len == 0)
583c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return 0;  // All digits were 0.
584c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
585c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Verify we don't have too many digits (we'll be copying to our buffer so
586c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // we need to double-check).
587c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (digits_comp.len > kMaxDigits)
588c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return PORT_INVALID;
589c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
590c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Copy valid digits to the buffer.
591c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  char digits[kMaxDigits + 1];  // +1 for null terminator
592c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = 0; i < digits_comp.len; i++) {
593c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    CHAR ch = spec[digits_comp.begin + i];
594c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (!IsPortDigit(ch)) {
595c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      // Invalid port digit, fail.
596c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return PORT_INVALID;
597c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
598c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    digits[i] = static_cast<char>(ch);
599c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
600c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
601c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Null-terminate the string and convert to integer. Since we guarantee
602c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // only digits, atoi's lack of error handling is OK.
603c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  digits[digits_comp.len] = 0;
604c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int port = atoi(digits);
605c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (port > 65535)
606c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return PORT_INVALID;  // Out of range.
607c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return port;
608c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
609c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
610c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
611c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void DoExtractFileName(const CHAR* spec,
612c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       const Component& path,
613c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       Component* file_name) {
614c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Handle empty paths: they have no file names.
615c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!path.is_nonempty()) {
616c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    file_name->reset();
617c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return;
618c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
619c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
620c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Search backwards for a parameter, which is a normally unused field in a
621c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // URL delimited by a semicolon. We parse the parameter as part of the
622c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // path, but here, we don't want to count it. The last semicolon is the
623c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // parameter. The path should start with a slash, so we don't need to check
624c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // the first one.
625c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int file_end = path.end();
626c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = path.end() - 1; i > path.begin; i--) {
627c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (spec[i] == ';') {
628c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      file_end = i;
629c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      break;
630c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
631c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
632c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
633c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Now search backwards from the filename end to the previous slash
634c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // to find the beginning of the filename.
635c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (int i = file_end - 1; i >= path.begin; i--) {
636c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (IsURLSlash(spec[i])) {
637c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      // File name is everything following this character to the end
638c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      *file_name = MakeRange(i + 1, file_end);
639c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return;
640c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
641c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
642c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
643c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // No slash found, this means the input was degenerate (generally paths
644c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // will start with a slash). Let's call everything the file name.
645c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  *file_name = MakeRange(path.begin, file_end);
646c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return;
647c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
648c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
649c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)template<typename CHAR>
650c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoExtractQueryKeyValue(const CHAR* spec,
651c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                            Component* query,
652c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                            Component* key,
653c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                            Component* value) {
654c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!query->is_nonempty())
655c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return false;
656c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
657c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int start = query->begin;
658c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int cur = start;
659c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int end = query->end();
660c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
661c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // We assume the beginning of the input is the beginning of the "key" and we
662c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // skip to the end of it.
663c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  key->begin = cur;
664c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (cur < end && spec[cur] != '&' && spec[cur] != '=')
665c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur++;
666c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  key->len = cur - key->begin;
667c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
668c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Skip the separator after the key (if any).
669c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (cur < end && spec[cur] == '=')
670c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur++;
671c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
672c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Find the value part.
673c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  value->begin = cur;
674c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (cur < end && spec[cur] != '&')
675c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur++;
676c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  value->len = cur - value->begin;
677c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
678c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Finally skip the next separator if any
679c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (cur < end && spec[cur] == '&')
680c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur++;
681c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
682c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Save the new query
683c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  *query = url_parse::MakeRange(cur, end);
684c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return true;
685c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
686c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
687c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}  // namespace
688c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
689c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)Parsed::Parsed() : inner_parsed_(NULL) {
690c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
691c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
692c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)Parsed::Parsed(const Parsed& other) :
693c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    scheme(other.scheme),
694c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    username(other.username),
695c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    password(other.password),
696c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    host(other.host),
697c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    port(other.port),
698c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    path(other.path),
699c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    query(other.query),
700c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ref(other.ref),
701c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    inner_parsed_(NULL) {
702c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (other.inner_parsed_)
703c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    set_inner_parsed(*other.inner_parsed_);
704c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
705c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
706c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)Parsed& Parsed::operator=(const Parsed& other) {
707c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (this != &other) {
708c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    scheme = other.scheme;
709c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    username = other.username;
710c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    password = other.password;
711c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    host = other.host;
712c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    port = other.port;
713c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    path = other.path;
714c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    query = other.query;
715c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    ref = other.ref;
716c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (other.inner_parsed_)
717c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      set_inner_parsed(*other.inner_parsed_);
718c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    else
719c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      clear_inner_parsed();
720c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
721c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return *this;
722c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
723c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
724c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)Parsed::~Parsed() {
725c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  delete inner_parsed_;
726c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
727c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
728c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int Parsed::Length() const {
729c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (ref.is_valid())
730c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return ref.end();
731c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return CountCharactersBefore(REF, false);
732c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
733c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
734c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int Parsed::CountCharactersBefore(ComponentType type,
735c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                                  bool include_delimiter) const {
736c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (type == SCHEME)
737c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return scheme.begin;
738c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
739c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // There will be some characters after the scheme like "://" and we don't
740c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // know how many. Search forwards for the next thing until we find one.
741c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int cur = 0;
742c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (scheme.is_valid())
743c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur = scheme.end() + 1;  // Advance over the ':' at the end of the scheme.
744c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
745c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (username.is_valid()) {
746c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type <= USERNAME)
747c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return username.begin;
748c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur = username.end() + 1;  // Advance over the '@' or ':' at the end.
749c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
750c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
751c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (password.is_valid()) {
752c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type <= PASSWORD)
753c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return password.begin;
754c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur = password.end() + 1;  // Advance over the '@' at the end.
755c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
756c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
757c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (host.is_valid()) {
758c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type <= HOST)
759c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return host.begin;
760c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur = host.end();
761c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
762c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
763c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (port.is_valid()) {
764c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type < PORT || (type == PORT && include_delimiter))
765c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return port.begin - 1;  // Back over delimiter.
766c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type == PORT)
767c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return port.begin;  // Don't want delimiter counted.
768c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur = port.end();
769c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
770c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
771c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (path.is_valid()) {
772c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type <= PATH)
773c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return path.begin;
774c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur = path.end();
775c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
776c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
777c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (query.is_valid()) {
778c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type < QUERY || (type == QUERY && include_delimiter))
779c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return query.begin - 1;  // Back over delimiter.
780c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type == QUERY)
781c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return query.begin;  // Don't want delimiter counted.
782c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    cur = query.end();
783c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
784c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
785c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (ref.is_valid()) {
786c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (type == REF && !include_delimiter)
787c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return ref.begin;  // Back over delimiter.
788c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
789c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // When there is a ref and we get here, the component we wanted was before
790c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // this and not found, so we always know the beginning of the ref is right.
791c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return ref.begin - 1;  // Don't want delimiter counted.
792c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
793c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
794c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return cur;
795c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
796c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
797de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan DixonComponent Parsed::GetContent() const {
798de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon  const int begin = CountCharactersBefore(USERNAME, false);
799de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon  const int len = Length() - begin;
800de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon  // For compatability with the standard URL parser, we treat no content as
801de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon  // -1, rather than having a length of 0 (we normally wouldn't care so
802de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon  // much for these non-standard URLs).
803de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon  return len ? Component(begin, len) : Component();
804de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon}
805de1bd3ebc0808574e846315a068b6935d3e72d5fJonathan Dixon
806c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool ExtractScheme(const char* url, int url_len, Component* scheme) {
807c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return DoExtractScheme(url, url_len, scheme);
808c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
809c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8107d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool ExtractScheme(const base::char16* url, int url_len, Component* scheme) {
811c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return DoExtractScheme(url, url_len, scheme);
812c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
813c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
814c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// This handles everything that may be an authority terminator, including
815c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// backslash. For special backslash handling see DoParseAfterScheme.
8167d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool IsAuthorityTerminator(base::char16 ch) {
817c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return IsURLSlash(ch) || ch == '?' || ch == '#';
818c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
819c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
820c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ExtractFileName(const char* url,
821c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     const Component& path,
822c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     Component* file_name) {
823c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoExtractFileName(url, path, file_name);
824c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
825c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8267d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ExtractFileName(const base::char16* url,
827c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     const Component& path,
828c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                     Component* file_name) {
829c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoExtractFileName(url, path, file_name);
830c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
831c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
832c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool ExtractQueryKeyValue(const char* url,
833c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                          Component* query,
834c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                          Component* key,
835c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                          Component* value) {
836c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return DoExtractQueryKeyValue(url, query, key, value);
837c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
838c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8397d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool ExtractQueryKeyValue(const base::char16* url,
840c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                          Component* query,
841c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                          Component* key,
842c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                          Component* value) {
843c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return DoExtractQueryKeyValue(url, query, key, value);
844c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
845c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
846c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseAuthority(const char* spec,
847c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    const Component& auth,
848c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    Component* username,
849c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    Component* password,
850c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    Component* hostname,
851c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    Component* port_num) {
852c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseAuthority(spec, auth, username, password, hostname, port_num);
853c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
854c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8557d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseAuthority(const base::char16* spec,
856c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    const Component& auth,
857c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    Component* username,
858c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    Component* password,
859c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    Component* hostname,
860c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                    Component* port_num) {
861c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseAuthority(spec, auth, username, password, hostname, port_num);
862c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
863c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
864c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int ParsePort(const char* url, const Component& port) {
865c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return DoParsePort(url, port);
866c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
867c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8687d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)int ParsePort(const base::char16* url, const Component& port) {
869c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return DoParsePort(url, port);
870c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
871c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
872c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {
873c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseStandardURL(url, url_len, parsed);
874c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
875c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8767d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed) {
877c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseStandardURL(url, url_len, parsed);
878c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
879c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
880c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParsePathURL(const char* url, int url_len, Parsed* parsed) {
881c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParsePathURL(url, url_len, parsed);
882c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
883c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8847d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParsePathURL(const base::char16* url, int url_len, Parsed* parsed) {
885c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParsePathURL(url, url_len, parsed);
886c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
887c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
888c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {
889c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseFileSystemURL(url, url_len, parsed);
890c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
891c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
8927d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed) {
893c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseFileSystemURL(url, url_len, parsed);
894c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
895c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
896c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
897c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseMailtoURL(url, url_len, parsed);
898c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
899c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
9007d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed) {
901c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseMailtoURL(url, url_len, parsed);
902c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
903c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
904c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParsePathInternal(const char* spec,
905c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       const Component& path,
906c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       Component* filepath,
907c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       Component* query,
908c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       Component* ref) {
909c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  ParsePath(spec, path, filepath, query, ref);
910c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
911c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
9127d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParsePathInternal(const base::char16* spec,
913c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       const Component& path,
914c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       Component* filepath,
915c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       Component* query,
916c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                       Component* ref) {
917c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  ParsePath(spec, path, filepath, query, ref);
918c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
919c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
920c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)void ParseAfterScheme(const char* spec,
921c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      int spec_len,
922c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      int after_scheme,
923c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      Parsed* parsed) {
924c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
925c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
926c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
9277d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)void ParseAfterScheme(const base::char16* spec,
928c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      int spec_len,
929c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      int after_scheme,
930c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)                      Parsed* parsed) {
931c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
932c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
933c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
934c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}  // namespace url_parse
935