1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott/* Based on nsURLParsers.cc from Mozilla
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * -------------------------------------
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The contents of this file are subject to the Mozilla Public License Version
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * 1.1 (the "License"); you may not use this file except in compliance with
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the License. You may obtain a copy of the License at
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * http://www.mozilla.org/MPL/
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Software distributed under the License is distributed on an "AS IS" basis,
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * for the specific language governing rights and limitations under the
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * License.
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The Original Code is mozilla.org code.
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * The Initial Developer of the Original Code is
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Netscape Communications Corporation.
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Portions created by the Initial Developer are Copyright (C) 1998
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the Initial Developer. All Rights Reserved.
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Contributor(s):
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *   Darin Fisher (original author)
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * Alternatively, the contents of this file may be used under the terms of
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * either the GNU General Public License Version 2 or later (the "GPL"), or
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * in which case the provisions of the GPL or the LGPL are applicable instead
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * of those above. If you wish to allow use of your version of this file only
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * under the terms of either the GPL or the LGPL, and not to allow others to
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * use your version of this file under the terms of the MPL, indicate your
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * decision by deleting the provisions above and replace them with the notice
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * and other provisions required by the GPL or the LGPL. If you do not delete
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the provisions above, a recipient may use your version of this file under
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * the terms of any one of the MPL, the GPL or the LGPL.
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * ***** END LICENSE BLOCK ***** */
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_parse.h"
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stdlib.h>
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/logging.h"
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/url_parse_internal.h"
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace url_parse {
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace {
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Returns true if the given character is a valid digit to use in a port.
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottinline bool IsPortDigit(char16 ch) {
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return ch >= '0' && ch <= '9';
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Returns the offset of the next authority terminator in the input starting
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// from start_offset. If no terminator is found, the return value will be equal
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to spec_len.
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint FindNextAuthorityTerminator(const CHAR* spec,
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                int start_offset,
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                int spec_len) {
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = start_offset; i < spec_len; i++) {
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (IsAuthorityTerminator(spec[i]))
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return i;
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return spec_len;  // Not found.
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseUserInfo(const CHAR* spec,
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                   const Component& user,
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                   Component* username,
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                   Component* password) {
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Find the first colon in the user section, which separates the username and
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // password.
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int colon_offset = 0;
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    colon_offset++;
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (colon_offset < user.len) {
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Found separator: <username>:<password>
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *username = Component(user.begin, colon_offset);
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *password = MakeRange(user.begin + colon_offset + 1,
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          user.begin + user.len);
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // No separator, treat everything as the username
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *username = user;
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *password = Component();
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseServerInfo(const CHAR* spec,
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     const Component& serverinfo,
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     Component* hostname,
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     Component* port_num) {
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (serverinfo.len == 0) {
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // No server info, host name is empty.
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    hostname->reset();
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    port_num->reset();
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return;
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If the host starts with a left-bracket, assume the entire host is an
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // IPv6 literal.  Otherwise, assume none of the host is an IPv6 literal.
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This assumption will be overridden if we find a right-bracket.
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Our IPv6 address canonicalization code requires both brackets to exist,
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // but the ability to locate an incomplete address can still be useful.
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int colon = -1;
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Find the last right-bracket, and the last colon.
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = serverinfo.begin; i < serverinfo.end(); i++) {
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    switch (spec[i]) {
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case ']':
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        ipv6_terminator = i;
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case ':':
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        colon = i;
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (colon > ipv6_terminator) {
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Found a port number: <hostname>:<port>
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *hostname = MakeRange(serverinfo.begin, colon);
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (hostname->len == 0)
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      hostname->reset();
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *port_num = MakeRange(colon + 1, serverinfo.end());
129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // No port: <hostname>
131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *hostname = serverinfo;
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    port_num->reset();
133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Given an already-identified auth section, breaks it into its consituent
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// parts. The port number will be parsed and the resulting integer will be
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// filled into the given *port variable, or -1 if there is no port number or it
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// is invalid.
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoParseAuthority(const CHAR* spec,
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      const Component& auth,
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      Component* username,
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      Component* password,
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      Component* hostname,
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      Component* port_num) {
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DCHECK(auth.is_valid()) << "We should always get an authority";
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (auth.len == 0) {
149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    username->reset();
150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    password->reset();
151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    hostname->reset();
152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    port_num->reset();
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return;
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Search backwards for @, which is the separator between the user info and
157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the server info.
158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int i = auth.begin + auth.len - 1;
159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  while (i > auth.begin && spec[i] != '@')
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    i--;
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (spec[i] == '@') {
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Found user info: <user-info>@<server-info>
164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ParseUserInfo(spec, Component(auth.begin, i - auth.begin),
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  username, password);
166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len),
167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    hostname, port_num);
168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // No user info, everything is server info.
170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    username->reset();
171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    password->reset();
172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ParseServerInfo(spec, auth, hostname, port_num);
173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePath(const CHAR* spec,
178c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott               const Component& path,
179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott               Component* filepath,
180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott               Component* query,
181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott               Component* ref) {
182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Special case when there is no path.
185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (path.len == -1) {
186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    filepath->reset();
187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    query->reset();
188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ref->reset();
189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return;
190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DCHECK(path.len > 0) << "We should never have 0 length paths";
192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Search for first occurrence of either ? or #.
194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int path_end = path.begin + path.len;
195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int query_separator = -1;  // Index of the '?'
197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int ref_separator = -1;    // Index of the '#'
198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = path.begin; i < path_end; i++) {
199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    switch (spec[i]) {
200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case '?':
201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Only match the query string if it precedes the reference fragment
202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // and when we haven't found one already.
203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (ref_separator < 0 && query_separator < 0)
204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          query_separator = i;
205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case '#':
207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Record the first # sign only.
208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (ref_separator < 0)
209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          ref_separator = i;
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Markers pointing to the character after each of these corresponding
215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // components. The code below words from the end back to the beginning,
216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // and will update these indices as it finds components that exist.
217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int file_end, query_end;
218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Ref fragment: from the # to the end of the path.
220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (ref_separator >= 0) {
221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    file_end = query_end = ref_separator;
222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *ref = MakeRange(ref_separator + 1, path_end);
223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    file_end = query_end = path_end;
225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ref->reset();
226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Query fragment: everything from the ? to the next boundary (either the end
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // of the path or the ref fragment).
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (query_separator >= 0) {
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    file_end = query_separator;
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *query = MakeRange(query_separator + 1, query_end);
233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    query->reset();
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // File path: treat an empty file path as no file path.
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (file_end != path.begin)
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    *filepath = MakeRange(path.begin, file_end);
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  else
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    filepath->reset();
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoExtractScheme(const CHAR* url,
246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     int url_len,
247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     Component* scheme) {
248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Skip leading whitespace and control characters.
249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int begin = 0;
250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  while (begin < url_len && ShouldTrimFromURL(url[begin]))
251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    begin++;
252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (begin == url_len)
253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return false;  // Input is empty or all whitespace.
254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Find the first colon character.
256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = begin; i < url_len; i++) {
257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (url[i] == ':') {
258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      *scheme = MakeRange(begin, i);
259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return true;
260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return false;  // No colon found: no scheme
263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Fills in all members of the Parsed structure except for the scheme.
266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//
267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// |spec| is the full spec being parsed, of length |spec_len|.
268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// |after_scheme| is the character immediately following the scheme (after the
269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//   colon) where we'll begin parsing.
270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//
271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Compatability data points. I list "host", "path" extracted:
272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Input                IE6             Firefox                Us
273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// -----                --------------  --------------         --------------
274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http://foo.com/      "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http:foo.com/        "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http:/foo.com/       fail(*)         "foo.com", "/"         "foo.com", "/"
277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http:\foo.com/       fail(*)         "\foo.com", "/"(fail)  "foo.com", "/"
278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// http:////foo.com/    "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//
280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// (*) Interestingly, although IE fails to load these URLs, its history
281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// canonicalizer handles them, meaning if you've been to the corresponding
282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// "http://foo.com/" link, it will be colored.
283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochtemplate <typename CHAR>
284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid DoParseAfterScheme(const CHAR* spec,
285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                        int spec_len,
286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                        int after_scheme,
287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                        Parsed* parsed) {
288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int after_slashes = after_scheme + num_slashes;
290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // First split into two main parts, the authority (username, password, host,
292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // and port) and the full path (path, query, and reference).
293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Component authority;
294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Component full_path;
295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Found "//<some data>", looks like an authority section. Treat everything
297c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // from there to the next slash (or end of spec) to be the authority. Note
298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // that we ignore the number of slashes and treat it as the authority.
299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  authority = Component(after_slashes, end_auth - after_slashes);
301c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (end_auth == spec_len)  // No beginning of path found.
303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    full_path = Component();
304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  else  // Everything starting from the slash to the end is the path.
305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    full_path = Component(end_auth, spec_len - end_auth);
306c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Now parse those two sub-parts.
308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                   &parsed->host, &parsed->port);
310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
312c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
313c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The main parsing function for standard URLs. Standard URLs have a scheme,
314c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// host, path, etc.
315c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
316c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {
317c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DCHECK(spec_len >= 0);
318c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
319c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Strip leading & trailing spaces and control characters.
320c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int begin = 0;
321c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  TrimURL(spec, &begin, &spec_len);
322c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
323c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int after_scheme;
324c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
325c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    after_scheme = parsed->scheme.end() + 1;  // Skip past the colon.
326c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
3273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    // Say there's no scheme when there is no colon. We could also say that
328c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // everything is the scheme. Both would produce an invalid URL, but this way
329c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // seems less wrong in more cases.
330c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->scheme.reset();
331c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    after_scheme = begin;
332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
335c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Initializes a path URL which is merely a scheme followed by a path. Examples
337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// include "about:foo" and "javascript:alert('bar');"
338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
339c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) {
340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Get the non-path and non-scheme parts of the URL out of the way, we never
341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // use them.
342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->username.reset();
343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->password.reset();
344c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->host.reset();
345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->port.reset();
346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->query.reset();
347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->ref.reset();
348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Strip leading & trailing spaces and control characters.
350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int begin = 0;
351c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  TrimURL(spec, &begin, &spec_len);
352c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
353c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Handle empty specs or ones that contain only whitespace or control chars.
354c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (begin == spec_len) {
355c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->scheme.reset();
356c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->path.reset();
357c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return;
358c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
359c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
360c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Extract the scheme, with the path being everything following. We also
361c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // handle the case where there is no scheme.
362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
363c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Offset the results since we gave ExtractScheme a substring.
364c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->scheme.begin += begin;
365c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
366c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // For compatability with the standard URL parser, we treat no path as
367c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // -1, rather than having a length of 0 (we normally wouldn't care so
368c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // much for these non-standard URLs).
369c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (parsed->scheme.end() == spec_len - 1)
370c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      parsed->path.reset();
371c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    else
372c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      parsed->path = MakeRange(parsed->scheme.end() + 1, spec_len);
373c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // No scheme found, just path.
375c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->scheme.reset();
376c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->path = MakeRange(begin, spec_len);
377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
378c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
379c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
380c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
381c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) {
382c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DCHECK(spec_len >= 0);
383c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
384c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Get the non-path and non-scheme parts of the URL out of the way, we never
385c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // use them.
386c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->username.reset();
387c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->password.reset();
388c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->host.reset();
389c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->port.reset();
390c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->ref.reset();
391c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  parsed->query.reset();  // May use this; reset for convenience.
392c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
393c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Strip leading & trailing spaces and control characters.
394c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int begin = 0;
395c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  TrimURL(spec, &begin, &spec_len);
396c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
397c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Handle empty specs or ones that contain only whitespace or control chars.
398c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (begin == spec_len) {
399c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->scheme.reset();
400c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->path.reset();
401c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return;
402c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
403c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
404c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int path_begin = -1;
405c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int path_end = -1;
406c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
407c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Extract the scheme, with the path being everything following. We also
408c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // handle the case where there is no scheme.
409c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
410c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Offset the results since we gave ExtractScheme a substring.
411c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->scheme.begin += begin;
412c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
413c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (parsed->scheme.end() != spec_len - 1) {
414c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      path_begin = parsed->scheme.end() + 1;
415c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      path_end = spec_len;
416c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
417c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
418c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // No scheme found, just path.
419c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->scheme.reset();
420c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    path_begin = begin;
421c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    path_end = spec_len;
422c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
423c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
424c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Split [path_begin, path_end) into a path + query.
425c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = path_begin; i < path_end; ++i) {
426c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (spec[i] == '?') {
427c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      parsed->query = MakeRange(i + 1, path_end);
428c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      path_end = i;
429c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      break;
430c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
431c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
432c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
433c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // For compatability with the standard URL parser, treat no path as
434c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // -1, rather than having a length of 0
435c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (path_begin == path_end) {
436c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->path.reset();
437c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } else {
438c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    parsed->path = MakeRange(path_begin, path_end);
439c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
440c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
441c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
442c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Converts a port number in a string to an integer. We'd like to just call
443c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
444c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// we copy the digits to a small stack buffer (since we know the maximum number
445c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// of digits in a valid port number) that we can NULL terminate.
446c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
447c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint DoParsePort(const CHAR* spec, const Component& component) {
448c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Easy success case when there is no port.
449c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const int kMaxDigits = 5;
450c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (!component.is_nonempty())
451c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return PORT_UNSPECIFIED;
452c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
453c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Skip over any leading 0s.
454c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  Component digits_comp(component.end(), 0);
455c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = 0; i < component.len; i++) {
456c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (spec[component.begin + i] != '0') {
457c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      digits_comp = MakeRange(component.begin + i, component.end());
458c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      break;
459c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
460c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
461c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (digits_comp.len == 0)
462c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return 0;  // All digits were 0.
463c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
464c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Verify we don't have too many digits (we'll be copying to our buffer so
465c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // we need to double-check).
466c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (digits_comp.len > kMaxDigits)
467c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return PORT_INVALID;
468c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
469c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Copy valid digits to the buffer.
470c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  char digits[kMaxDigits + 1];  // +1 for null terminator
471c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = 0; i < digits_comp.len; i++) {
472c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    CHAR ch = spec[digits_comp.begin + i];
473c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (!IsPortDigit(ch)) {
474c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // Invalid port digit, fail.
475c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return PORT_INVALID;
476c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
477c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    digits[i] = static_cast<char>(ch);
478c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
479c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
480c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Null-terminate the string and convert to integer. Since we guarantee
481c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // only digits, atoi's lack of error handling is OK.
482c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  digits[digits_comp.len] = 0;
483c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int port = atoi(digits);
484c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (port > 65535)
485c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return PORT_INVALID;  // Out of range.
486c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return port;
487c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
488c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
489c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
490c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid DoExtractFileName(const CHAR* spec,
491c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const Component& path,
492c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       Component* file_name) {
493c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Handle empty paths: they have no file names.
494c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (!path.is_nonempty()) {
495c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    file_name->reset();
496c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return;
497c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
498c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
499c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Search backwards for a parameter, which is a normally unused field in a
500c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // URL delimited by a semicolon. We parse the parameter as part of the
501c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // path, but here, we don't want to count it. The last semicolon is the
502c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // parameter. The path should start with a slash, so we don't need to check
503c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the first one.
504c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int file_end = path.end();
505c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = path.end() - 1; i > path.begin; i--) {
506c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (spec[i] == ';') {
507c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      file_end = i;
508c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      break;
509c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
510c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
511c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
512c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Now search backwards from the filename end to the previous slash
513c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // to find the beginning of the filename.
514c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (int i = file_end - 1; i >= path.begin; i--) {
515c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (IsURLSlash(spec[i])) {
516c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // File name is everything following this character to the end
517c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      *file_name = MakeRange(i + 1, file_end);
518c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return;
519c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
520c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
521c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
522c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // No slash found, this means the input was degenerate (generally paths
523c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // will start with a slash). Let's call everything the file name.
524c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  *file_name = MakeRange(path.begin, file_end);
525c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return;
526c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
527c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
528c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename CHAR>
529c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DoExtractQueryKeyValue(const CHAR* spec,
530c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            Component* query,
531c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            Component* key,
532c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            Component* value) {
533c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (!query->is_nonempty())
534c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return false;
535c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
536c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int start = query->begin;
537c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int cur = start;
538c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int end = query->end();
539c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
540c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // We assume the beginning of the input is the beginning of the "key" and we
541c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // skip to the end of it.
542c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  key->begin = cur;
543c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  while (cur < end && spec[cur] != '&' && spec[cur] != '=')
544c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur++;
545c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  key->len = cur - key->begin;
546c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
547c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Skip the separator after the key (if any).
548c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (cur < end && spec[cur] == '=')
549c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur++;
550c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
551c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Find the value part.
552c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  value->begin = cur;
553c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  while (cur < end && spec[cur] != '&')
554c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur++;
555c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  value->len = cur - value->begin;
556c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
557c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Finally skip the next separator if any
558c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (cur < end && spec[cur] == '&')
559c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur++;
560c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
561c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Save the new query
562c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  *query = url_parse::MakeRange(cur, end);
563c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return true;
564c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
565c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
566c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace
567c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
568513209b27ff55e2841eac0e4120199c23acce758Ben MurdochParsed::Parsed() {
569513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch}
570513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch
571c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint Parsed::Length() const {
572c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (ref.is_valid())
573c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return ref.end();
574c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return CountCharactersBefore(REF, false);
575c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
576c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
577c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint Parsed::CountCharactersBefore(ComponentType type,
578c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                  bool include_delimiter) const {
579c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (type == SCHEME)
580c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return scheme.begin;
581c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
582c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // There will be some characters after the scheme like "://" and we don't
583c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // know how many. Search forwards for the next thing until we find one.
584c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int cur = 0;
585c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (scheme.is_valid())
586c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur = scheme.end() + 1;  // Advance over the ':' at the end of the scheme.
587c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
588c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (username.is_valid()) {
589c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type <= USERNAME)
590c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return username.begin;
591c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur = username.end() + 1;  // Advance over the '@' or ':' at the end.
592c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
593c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
594c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (password.is_valid()) {
595c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type <= PASSWORD)
596c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return password.begin;
597c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur = password.end() + 1;  // Advance over the '@' at the end.
598c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
599c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
600c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (host.is_valid()) {
601c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type <= HOST)
602c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return host.begin;
603c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur = host.end();
604c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
605c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
606c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (port.is_valid()) {
607c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type < PORT || (type == PORT && include_delimiter))
608c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return port.begin - 1;  // Back over delimiter.
609c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type == PORT)
610c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return port.begin;  // Don't want delimiter counted.
611c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur = port.end();
612c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
613c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
614c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (path.is_valid()) {
615c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type <= PATH)
616c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return path.begin;
617c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur = path.end();
618c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
619c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
620c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (query.is_valid()) {
621c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type < QUERY || (type == QUERY && include_delimiter))
622c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return query.begin - 1;  // Back over delimiter.
623c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type == QUERY)
624c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return query.begin;  // Don't want delimiter counted.
625c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    cur = query.end();
626c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
627c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
628c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (ref.is_valid()) {
629c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (type == REF && !include_delimiter)
630c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      return ref.begin;  // Back over delimiter.
631c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
632c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // When there is a ref and we get here, the component we wanted was before
633c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // this and not found, so we always know the beginning of the ref is right.
634c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return ref.begin - 1;  // Don't want delimiter counted.
635c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
636c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
637c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return cur;
638c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
639c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
640c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ExtractScheme(const char* url, int url_len, Component* scheme) {
641c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoExtractScheme(url, url_len, scheme);
642c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
643c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
644c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ExtractScheme(const char16* url, int url_len, Component* scheme) {
645c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoExtractScheme(url, url_len, scheme);
646c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
647c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
648c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This handles everything that may be an authority terminator, including
649c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// backslash. For special backslash handling see DoParseAfterScheme.
650c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool IsAuthorityTerminator(char16 ch) {
6513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return IsURLSlash(ch) || ch == '?' || ch == '#';
652c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
653c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
654c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ExtractFileName(const char* url,
655c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     const Component& path,
656c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     Component* file_name) {
657c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoExtractFileName(url, path, file_name);
658c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
659c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
660c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ExtractFileName(const char16* url,
661c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     const Component& path,
662c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                     Component* file_name) {
663c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoExtractFileName(url, path, file_name);
664c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
665c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
666c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ExtractQueryKeyValue(const char* url,
667c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          Component* query,
668c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          Component* key,
669c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          Component* value) {
670c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoExtractQueryKeyValue(url, query, key, value);
671c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
672c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
673c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool ExtractQueryKeyValue(const char16* url,
674c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          Component* query,
675c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          Component* key,
676c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          Component* value) {
677c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoExtractQueryKeyValue(url, query, key, value);
678c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
679c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
680c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseAuthority(const char* spec,
681c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    const Component& auth,
682c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    Component* username,
683c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    Component* password,
684c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    Component* hostname,
685c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    Component* port_num) {
686c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseAuthority(spec, auth, username, password, hostname, port_num);
687c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
688c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
689c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid ParseAuthority(const char16* spec,
690c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    const Component& auth,
691c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    Component* username,
692c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    Component* password,
693c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    Component* hostname,
694c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    Component* port_num) {
695c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseAuthority(spec, auth, username, password, hostname, port_num);
696c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
697c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
698c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint ParsePort(const char* url, const Component& port) {
699c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoParsePort(url, port);
700c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
701c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
702c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint ParsePort(const char16* url, const Component& port) {
703c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return DoParsePort(url, port);
704c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
705c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
706c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseStandardURL(const char* url, int url_len, Parsed* parsed) {
707c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseStandardURL(url, url_len, parsed);
708c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
709c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
710c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseStandardURL(const char16* url, int url_len, Parsed* parsed) {
711c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseStandardURL(url, url_len, parsed);
712c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
713c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
714c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePathURL(const char* url, int url_len, Parsed* parsed) {
715c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParsePathURL(url, url_len, parsed);
716c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
717c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
718c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePathURL(const char16* url, int url_len, Parsed* parsed) {
719c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParsePathURL(url, url_len, parsed);
720c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
721c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
722c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
723c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseMailtoURL(url, url_len, parsed);
724c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
725c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
726c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseMailtoURL(const char16* url, int url_len, Parsed* parsed) {
727c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseMailtoURL(url, url_len, parsed);
728c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
729c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
730c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePathInternal(const char* spec,
731c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const Component& path,
732c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       Component* filepath,
733c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       Component* query,
734c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       Component* ref) {
735c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ParsePath(spec, path, filepath, query, ref);
736c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
737c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
738c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParsePathInternal(const char16* spec,
739c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       const Component& path,
740c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       Component* filepath,
741c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       Component* query,
742c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       Component* ref) {
743c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ParsePath(spec, path, filepath, query, ref);
744c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
745c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
746c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseAfterScheme(const char* spec,
747c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      int spec_len,
748c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      int after_scheme,
749c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      Parsed* parsed) {
750c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
751c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
752c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
753c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid ParseAfterScheme(const char16* spec,
754c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      int spec_len,
755c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      int after_scheme,
756c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      Parsed* parsed) {
757c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
758c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
759c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
760c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace url_parse
761