16c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen/* Based on nsURLParsers.cc from Mozilla
26c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * -------------------------------------
36c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Copyright (C) 1998 Netscape Communications Corporation.
46c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen *
56c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Other contributors:
66c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen *   Darin Fisher (original author)
76c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen *
86c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * This library is free software; you can redistribute it and/or
96c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * modify it under the terms of the GNU Lesser General Public
106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * License as published by the Free Software Foundation; either
116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * version 2.1 of the License, or (at your option) any later version.
126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen *
136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * This library is distributed in the hope that it will be useful,
146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * but WITHOUT ANY WARRANTY; without even the implied warranty of
156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Lesser General Public License for more details.
176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen *
186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * You should have received a copy of the GNU Lesser General Public
196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * License along with this library; if not, write to the Free Software
206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen *
226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Alternatively, the contents of this file may be used under the terms
236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * of either the Mozilla Public License Version 1.1, found at
246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * (the "GPL"), in which case the provisions of the MPL or the GPL are
276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * applicable instead of those above.  If you wish to allow use of your
286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * version of this file only under the terms of one of those two
296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * licenses (the MPL or the GPL) and not to allow others to use your
306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * version of this file under the LGPL, indicate your decision by
316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * deletingthe provisions above and replace them with the notice and
326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * other provisions required by the MPL or the GPL, as the case may be.
336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * If you do not delete the provisions above, a recipient may use your
346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * version of this file under any of the LGPL, the MPL or the GPL.
356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen */
366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#ifndef URLParser_h
386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#define URLParser_h
396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#include "URLComponent.h"
416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#include "URLSegments.h"
426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsennamespace WTF {
446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsentemplate<typename CHAR>
466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsenclass URLParser {
476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsenpublic:
486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    enum SpecialPort {
496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        UnspecifiedPort = -1,
506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        InvalidPort = -2,
516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    };
526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // This handles everything that may be an authority terminator, including
546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // backslash. For special backslash handling see parseAfterScheme.
556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static bool isPossibleAuthorityTerminator(CHAR ch)
566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return isURLSlash(ch) || ch == '?' || ch == '#' || ch == ';';
586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Given an already-identified auth section, breaks it into its constituent
616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // parts. The port number will be parsed and the resulting integer will be
626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // filled into the given *port variable, or -1 if there is no port number
636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // or it is invalid.
646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void parseAuthority(const CHAR* spec, const URLComponent& auth, URLComponent& username, URLComponent& password, URLComponent& host, URLComponent& port)
656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // FIXME: add ASSERT(auth.isValid()); // We should always get an authority.
676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (!auth.length()) {
686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            username.reset();
696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            password.reset();
706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            host.reset();
716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            port.reset();
726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return;
736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Search backwards for @, which is the separator between the user info
766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // and the server info.  RFC 3986 forbids @ from occuring in auth, but
776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // someone might include it in a password unescaped.
786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int i = auth.begin() + auth.length() - 1;
796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        while (i > auth.begin() && spec[i] != '@')
806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            --i;
816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (spec[i] == '@') {
836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // Found user info: <user-info>@<server-info>
846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parseUserInfo(spec, URLComponent(auth.begin(), i - auth.begin()), username, password);
856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parseServerInfo(spec, URLComponent::fromRange(i + 1, auth.begin() + auth.length()), host, port);
866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        } else {
876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // No user info, everything is server info.
886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            username.reset();
896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            password.reset();
906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parseServerInfo(spec, auth, host, port);
916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static bool extractScheme(const CHAR* spec, int specLength, URLComponent& scheme)
956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Skip leading whitespace and control characters.
976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int begin = 0;
986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        while (begin < specLength && shouldTrimFromURL(spec[begin]))
996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            begin++;
1006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (begin == specLength)
1016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return false; // Input is empty or all whitespace.
1026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Find the first colon character.
1046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = begin; i < specLength; i++) {
1056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (spec[i] == ':') {
1066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                scheme = URLComponent::fromRange(begin, i);
1076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                return true;
1086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            }
1096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
1106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return false; // No colon found: no scheme
1116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
1126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Fills in all members of the URLSegments structure (except for the
1146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // scheme) for standard URLs.
1156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    //
1166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // |spec| is the full spec being parsed, of length |specLength|.
1176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // |afterScheme| is the character immediately following the scheme (after
1186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // the colon) where we'll begin parsing.
1196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void parseAfterScheme(const CHAR* spec, int specLength, int afterScheme, URLSegments& parsed)
1206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
1216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int numberOfSlashes = consecutiveSlashes(spec, afterScheme, specLength);
1226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int afterSlashes = afterScheme + numberOfSlashes;
1236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // First split into two main parts, the authority (username, password,
1256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // host, and port) and the full path (path, query, and reference).
1266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        URLComponent authority;
1276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        URLComponent fullPath;
1286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Found "//<some data>", looks like an authority section. Treat
1306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // everything from there to the next slash (or end of spec) to be the
1316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // authority. Note that we ignore the number of slashes and treat it as
1326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // the authority.
1336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int authEnd = nextAuthorityTerminator(spec, afterSlashes, specLength);
1346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        authority = URLComponent(afterSlashes, authEnd - afterSlashes);
1356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (authEnd == specLength) // No beginning of path found.
1376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fullPath = URLComponent();
1386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        else // Everything starting from the slash to the end is the path.
1396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fullPath = URLComponent(authEnd, specLength - authEnd);
1406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Now parse those two sub-parts.
1426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parseAuthority(spec, authority, parsed.username, parsed.password, parsed.host, parsed.port);
1436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsePath(spec, fullPath, parsed.path, parsed.query, parsed.fragment);
1446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
1456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // The main parsing function for standard URLs. Standard URLs have a scheme,
1476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // host, path, etc.
1486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void parseStandardURL(const CHAR* spec, int specLength, URLSegments& parsed)
1496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
1506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // FIXME: add ASSERT(specLength >= 0);
1516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Strip leading & trailing spaces and control characters.
1536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int begin = 0;
1546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        trimURL(spec, begin, specLength);
1556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int afterScheme;
1576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (extractScheme(spec, specLength, parsed.scheme))
1586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            afterScheme = parsed.scheme.end() + 1; // Skip past the colon.
1596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        else {
1606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // Say there's no scheme when there is a colon. We could also say
1616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // that everything is the scheme. Both would produce an invalid
1626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // URL, but this way seems less wrong in more cases.
1636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.scheme.reset();
1646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            afterScheme = begin;
1656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
1666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parseAfterScheme(spec, specLength, afterScheme, parsed);
1676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
1686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void parsePath(const CHAR* spec, const URLComponent& path, URLComponent& filepath, URLComponent& query, URLComponent& fragment)
1706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
1716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<fragment>
1726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Special case when there is no path.
1746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (!path.isValid()) {
1756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            filepath.reset();
1766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            query.reset();
1776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fragment.reset();
1786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return;
1796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
1806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // FIXME: add ASSERT(path.length() > 0); // We should never have 0 length paths.
1816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Search for first occurrence of either ? or #.
1836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int pathEnd = path.begin() + path.length();
1846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
1856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int querySeparator = -1; // Index of the '?'
1866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int refSeparator = -1; // Index of the '#'
1876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = path.begin(); i < pathEnd; i++) {
1886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            switch (spec[i]) {
1896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            case '?':
1906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                if (querySeparator < 0)
1916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                    querySeparator = i;
1926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
1936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            case '#':
1946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                refSeparator = i;
1956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                i = pathEnd; // Break out of the loop.
1966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
1976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            default:
1986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
1996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            }
2006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
2016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Markers pointing to the character after each of these corresponding
2036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // components. The code below works from the end back to the beginning,
2046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // and will update these indices as it finds components that exist.
2056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int fileEnd, queryEnd;
2066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Fragment: from the # to the end of the path.
2086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (refSeparator >= 0) {
2096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fileEnd = refSeparator;
2106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            queryEnd = refSeparator;
2116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fragment = URLComponent::fromRange(refSeparator + 1, pathEnd);
2126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        } else {
2136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fileEnd = pathEnd;
2146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            queryEnd = pathEnd;
2156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fragment.reset();
2166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
2176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Query fragment: everything from the ? to the next boundary (either
2196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // the end of the path or the fragment fragment).
2206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (querySeparator >= 0) {
2216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fileEnd = querySeparator;
2226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            query = URLComponent::fromRange(querySeparator + 1, queryEnd);
2236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        } else
2246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            query.reset();
2256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // File path: treat an empty file path as no file path.
2276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (fileEnd != path.begin())
2286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            filepath = URLComponent::fromRange(path.begin(), fileEnd);
2296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        else
2306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            filepath.reset();
2316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
2326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Initializes a path URL which is merely a scheme followed by a path.
2346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Examples include "about:foo" and "javascript:alert('bar');"
2356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void parsePathURL(const CHAR* spec, int specLength, URLSegments& parsed)
2366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
2376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Get the non-path and non-scheme parts of the URL out of the way, we
2386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // never use them.
2396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.username.reset();
2406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.password.reset();
2416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.host.reset();
2426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.port.reset();
2436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.query.reset();
2446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.fragment.reset();
2456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Strip leading & trailing spaces and control characters.
2476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // FIXME: Perhaps this is unnecessary?
2486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int begin = 0;
2496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        trimURL(spec, begin, specLength);
2506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Handle empty specs or ones that contain only whitespace or control
2526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // chars.
2536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (begin == specLength) {
2546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.scheme.reset();
2556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.path.reset();
2566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return;
2576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
2586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Extract the scheme, with the path being everything following. We also
2606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // handle the case where there is no scheme.
2616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (extractScheme(&spec[begin], specLength - begin, parsed.scheme)) {
2626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // Offset the results since we gave extractScheme a substring.
2636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.scheme.setBegin(parsed.scheme.begin() + begin);
2646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // For compatibility with the standard URL parser, we treat no path
2666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // as -1, rather than having a length of 0 (we normally wouldn't
2676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // care so much for these non-standard URLs).
2686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (parsed.scheme.end() == specLength - 1)
2696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                parsed.path.reset();
2706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            else
2716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                parsed.path = URLComponent::fromRange(parsed.scheme.end() + 1, specLength);
2726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        } else {
2736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // No scheme found, just path.
2746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.scheme.reset();
2756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.path = URLComponent::fromRange(begin, specLength);
2766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
2776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
2786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void parseMailtoURL(const CHAR* spec, int specLength, URLSegments& parsed)
2806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
2816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // FIXME: add ASSERT(specLength >= 0);
2826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Get the non-path and non-scheme parts of the URL out of the way, we
2846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // never use them.
2856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.username.reset();
2866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.password.reset();
2876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.host.reset();
2886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.port.reset();
2896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.fragment.reset();
2906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        parsed.query.reset(); // May use this; reset for convenience.
2916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Strip leading & trailing spaces and control characters.
2936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int begin = 0;
2946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        trimURL(spec, begin, specLength);
2956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
2966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Handle empty specs or ones that contain only whitespace or control
2976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // chars.
2986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (begin == specLength) {
2996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.scheme.reset();
3006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.path.reset();
3016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return;
3026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
3036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int pathBegin = -1;
3056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int pathEnd = -1;
3066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Extract the scheme, with the path being everything following. We also
3086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // handle the case where there is no scheme.
3096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (extractScheme(&spec[begin], specLength - begin, parsed.scheme)) {
3106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // Offset the results since we gave extractScheme a substring.
3116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.scheme.setBegin(parsed.scheme.begin() + begin);
3126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (parsed.scheme.end() != specLength - 1) {
3146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                pathBegin = parsed.scheme.end() + 1;
3156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                pathEnd = specLength;
3166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            }
3176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        } else {
3186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // No scheme found, just path.
3196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.scheme.reset();
3206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            pathBegin = begin;
3216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            pathEnd = specLength;
3226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
3236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Split [pathBegin, pathEnd) into a path + query.
3256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = pathBegin; i < pathEnd; ++i) {
3266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (spec[i] == '?') {
3276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                parsed.query = URLComponent::fromRange(i + 1, pathEnd);
3286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                pathEnd = i;
3296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
3306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            }
3316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
3326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // For compatibility with the standard URL parser, treat no path as
3346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // -1, rather than having a length of 0
3356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (pathBegin == pathEnd)
3366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.path.reset();
3376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        else
3386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            parsed.path = URLComponent::fromRange(pathBegin, pathEnd);
3396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
3406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static int parsePort(const CHAR* spec, const URLComponent& component)
3426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
3436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Easy success case when there is no port.
3446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        const int maxDigits = 5;
3456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (component.isEmptyOrInvalid())
3466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return UnspecifiedPort;
3476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        URLComponent nonZeroDigits(component.end(), 0);
3496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = 0; i < component.length(); ++i) {
3506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (spec[component.begin() + i] != '0') {
3516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                nonZeroDigits = URLComponent::fromRange(component.begin() + i, component.end());
3526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
3536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            }
3546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
3556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (!nonZeroDigits.length())
3566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return 0; // All digits were 0.
3576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (nonZeroDigits.length() > maxDigits)
3596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return InvalidPort;
3606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int port = 0;
3626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = 0; i < nonZeroDigits.length(); ++i) {
3636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            CHAR ch = spec[nonZeroDigits.begin() + i];
3646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (!isPortDigit(ch))
3656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                return InvalidPort;
3666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            port *= 10;
3676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            port += static_cast<char>(ch) - '0';
3686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
3696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (port > 65535)
3706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return InvalidPort;
3716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return port;
3726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
3736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void extractFileName(const CHAR* spec, const URLComponent& path, URLComponent& fileName)
3756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
3766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Handle empty paths: they have no file names.
3776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (path.isEmptyOrInvalid()) {
3786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            fileName.reset();
3796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return;
3806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
3816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Search backwards for a parameter, which is a normally unused field
3836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // in a URL delimited by a semicolon. We parse the parameter as part of
3846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // the path, but here, we don't want to count it. The last semicolon is
3856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // the parameter.
3866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int fileEnd = path.end();
3876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = path.end() - 1; i > path.begin(); --i) {
3886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (spec[i] == ';') {
3896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                fileEnd = i;
3906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
3916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            }
3926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
3936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
3946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Now search backwards from the filename end to the previous slash
3956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // to find the beginning of the filename.
3966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = fileEnd - 1; i >= path.begin(); --i) {
3976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (isURLSlash(spec[i])) {
3986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                // File name is everything following this character to the end
3996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                fileName = URLComponent::fromRange(i + 1, fileEnd);
4006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                return;
4016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            }
4026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
4036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // No slash found, this means the input was degenerate (generally paths
4056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // will start with a slash). Let's call everything the file name.
4066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        fileName = URLComponent::fromRange(path.begin(), fileEnd);
4076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
4086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static bool extractQueryKeyValue(const CHAR* spec, URLComponent& query, URLComponent& key, URLComponent& value)
4106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
4116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (query.isEmptyOrInvalid())
4126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return false;
4136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int start = query.begin();
4156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int current = start;
4166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int end = query.end();
4176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // We assume the beginning of the input is the beginning of the "key"
4196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // and we skip to the end of it.
4206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        key.setBegin(current);
4216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        while (current < end && spec[current] != '&' && spec[current] != '=')
4226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            ++current;
4236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        key.setLength(current - key.begin());
4246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Skip the separator after the key (if any).
4266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (current < end && spec[current] == '=')
4276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            ++current;
4286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Find the value part.
4306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        value.setBegin(current);
4316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        while (current < end && spec[current] != '&')
4326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            ++current;
4336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        value.setLength(current - value.begin());
4346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Finally skip the next separator if any
4366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (current < end && spec[current] == '&')
4376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            ++current;
4386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Save the new query
4406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        query = URLComponent::fromRange(current, end);
4416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return true;
4426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
4436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen// FIXME: This should be protected or private.
4456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsenpublic:
4466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // We treat slashes and backslashes the same for IE compatibility.
4476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static inline bool isURLSlash(CHAR ch)
4486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
4496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return ch == '/' || ch == '\\';
4506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
4516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Returns true if we should trim this character from the URL because it is
4536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // a space or a control character.
4546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static inline bool shouldTrimFromURL(CHAR ch)
4556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
4566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return ch <= ' ';
4576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
4586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Given an already-initialized begin index and end index (the index after
4606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // the last CHAR in spec), this shrinks the range to eliminate
4616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // "should-be-trimmed" characters.
4626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static inline void trimURL(const CHAR* spec, int& begin, int& end)
4636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
4646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Strip leading whitespace and control characters.
4656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        while (begin < end && shouldTrimFromURL(spec[begin]))
4666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            ++begin;
4676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Strip trailing whitespace and control characters. We need the >i
4696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // test for when the input string is all blanks; we don't want to back
4706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // past the input.
4716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        while (end > begin && shouldTrimFromURL(spec[end - 1]))
4726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            --end;
4736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
4746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Counts the number of consecutive slashes starting at the given offset
4766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // in the given string of the given length.
4776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static inline int consecutiveSlashes(const CHAR *string, int beginOffset, int stringLength)
4786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
4796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int count = 0;
4806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        while (beginOffset + count < stringLength && isURLSlash(string[beginOffset + count]))
4816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            ++count;
4826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return count;
4836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
4846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsenprivate:
4866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // URLParser cannot be constructed.
4876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    URLParser();
4886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Returns true if the given character is a valid digit to use in a port.
4906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static inline bool isPortDigit(CHAR ch)
4916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
4926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return ch >= '0' && ch <= '9';
4936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
4946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
4956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // Returns the offset of the next authority terminator in the input starting
4966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // from startOffset. If no terminator is found, the return value will be equal
4976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    // to specLength.
4986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static int nextAuthorityTerminator(const CHAR* spec, int startOffset, int specLength)
4996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
5006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = startOffset; i < specLength; i++) {
5016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (isPossibleAuthorityTerminator(spec[i]))
5026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                return i;
5036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
5046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        return specLength; // Not found.
5056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
5066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
5076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void parseUserInfo(const CHAR* spec, const URLComponent& user, URLComponent& username, URLComponent& password)
5086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
5096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Find the first colon in the user section, which separates the
5106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // username and password.
5116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int colonOffset = 0;
5126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        while (colonOffset < user.length() && spec[user.begin() + colonOffset] != ':')
5136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            ++colonOffset;
5146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
5156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (colonOffset < user.length()) {
5166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // Found separator: <username>:<password>
5176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            username = URLComponent(user.begin(), colonOffset);
5186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            password = URLComponent::fromRange(user.begin() + colonOffset + 1, user.begin() + user.length());
5196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        } else {
5206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // No separator, treat everything as the username
5216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            username = user;
5226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            password = URLComponent();
5236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
5246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
5256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
5266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    static void parseServerInfo(const CHAR* spec, const URLComponent& serverInfo, URLComponent& host, URLComponent& port)
5276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    {
5286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (!serverInfo.length()) {
5296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // No server info, host name is empty.
5306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            host.reset();
5316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            port.reset();
5326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            return;
5336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
5346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
5356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // If the host starts with a left-bracket, assume the entire host is an
5366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // IPv6 literal.  Otherwise, assume none of the host is an IPv6 literal.
5376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // This assumption will be overridden if we find a right-bracket.
5386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        //
5396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Our IPv6 address canonicalization code requires both brackets to
5406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // exist, but the ability to locate an incomplete address can still be
5416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // useful.
5426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int ipv6Terminator = spec[serverInfo.begin()] == '[' ? serverInfo.end() : -1;
5436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        int colon = -1;
5446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
5456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        // Find the last right-bracket, and the last colon.
5466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        for (int i = serverInfo.begin(); i < serverInfo.end(); i++) {
5476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            switch (spec[i]) {
5486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            case ']':
5496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                ipv6Terminator = i;
5506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
5516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            case ':':
5526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                colon = i;
5536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
5546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            default:
5556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                break;
5566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            }
5576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
5586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
5596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        if (colon > ipv6Terminator) {
5606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // Found a port number: <hostname>:<port>
5616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            host = URLComponent::fromRange(serverInfo.begin(), colon);
5626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            if (!host.length())
5636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen                host.reset();
5646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            port = URLComponent::fromRange(colon + 1, serverInfo.end());
5656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        } else {
5666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            // No port: <hostname>
5676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            host = serverInfo;
5686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen            port.reset();
5696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen        }
5706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen    }
5716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen};
5726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
5736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen} // namespace WTF
5746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen
5756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#endif // URLParser_h
576