16c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen/* Based on nsURLParsers.cc from Mozilla 26c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * ------------------------------------- 36c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Copyright (C) 1998 Netscape Communications Corporation. 46c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * 56c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Other contributors: 66c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Darin Fisher (original author) 76c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * 86c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * This library is free software; you can redistribute it and/or 96c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * modify it under the terms of the GNU Lesser General Public 106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * License as published by the Free Software Foundation; either 116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * version 2.1 of the License, or (at your option) any later version. 126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * 136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * This library is distributed in the hope that it will be useful, 146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * but WITHOUT ANY WARRANTY; without even the implied warranty of 156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Lesser General Public License for more details. 176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * 186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * You should have received a copy of the GNU Lesser General Public 196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * License along with this library; if not, write to the Free Software 206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * 226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * Alternatively, the contents of this file may be used under the terms 236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * of either the Mozilla Public License Version 1.1, found at 246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public 256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html 266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * (the "GPL"), in which case the provisions of the MPL or the GPL are 276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * applicable instead of those above. If you wish to allow use of your 286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * version of this file only under the terms of one of those two 296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * licenses (the MPL or the GPL) and not to allow others to use your 306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * version of this file under the LGPL, indicate your decision by 316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * deletingthe provisions above and replace them with the notice and 326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * other provisions required by the MPL or the GPL, as the case may be. 336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * If you do not delete the provisions above, a recipient may use your 346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen * version of this file under any of the LGPL, the MPL or the GPL. 356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen */ 366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#ifndef URLParser_h 386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#define URLParser_h 396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#include "URLComponent.h" 416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#include "URLSegments.h" 426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsennamespace WTF { 446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsentemplate<typename CHAR> 466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsenclass URLParser { 476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsenpublic: 486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen enum SpecialPort { 496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen UnspecifiedPort = -1, 506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen InvalidPort = -2, 516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen }; 526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // This handles everything that may be an authority terminator, including 546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // backslash. For special backslash handling see parseAfterScheme. 556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static bool isPossibleAuthorityTerminator(CHAR ch) 566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return isURLSlash(ch) || ch == '?' || ch == '#' || ch == ';'; 586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Given an already-identified auth section, breaks it into its constituent 616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // parts. The port number will be parsed and the resulting integer will be 626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // filled into the given *port variable, or -1 if there is no port number 636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // or it is invalid. 646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void parseAuthority(const CHAR* spec, const URLComponent& auth, URLComponent& username, URLComponent& password, URLComponent& host, URLComponent& port) 656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // FIXME: add ASSERT(auth.isValid()); // We should always get an authority. 676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (!auth.length()) { 686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen username.reset(); 696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen password.reset(); 706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen host.reset(); 716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen port.reset(); 726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return; 736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Search backwards for @, which is the separator between the user info 766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // and the server info. RFC 3986 forbids @ from occuring in auth, but 776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // someone might include it in a password unescaped. 786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int i = auth.begin() + auth.length() - 1; 796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen while (i > auth.begin() && spec[i] != '@') 806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen --i; 816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (spec[i] == '@') { 836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Found user info: <user-info>@<server-info> 846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parseUserInfo(spec, URLComponent(auth.begin(), i - auth.begin()), username, password); 856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parseServerInfo(spec, URLComponent::fromRange(i + 1, auth.begin() + auth.length()), host, port); 866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } else { 876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // No user info, everything is server info. 886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen username.reset(); 896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen password.reset(); 906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parseServerInfo(spec, auth, host, port); 916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static bool extractScheme(const CHAR* spec, int specLength, URLComponent& scheme) 956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Skip leading whitespace and control characters. 976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int begin = 0; 986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen while (begin < specLength && shouldTrimFromURL(spec[begin])) 996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen begin++; 1006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (begin == specLength) 1016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return false; // Input is empty or all whitespace. 1026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Find the first colon character. 1046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = begin; i < specLength; i++) { 1056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (spec[i] == ':') { 1066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen scheme = URLComponent::fromRange(begin, i); 1076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return true; 1086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 1096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 1106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return false; // No colon found: no scheme 1116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 1126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Fills in all members of the URLSegments structure (except for the 1146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // scheme) for standard URLs. 1156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // 1166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // |spec| is the full spec being parsed, of length |specLength|. 1176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // |afterScheme| is the character immediately following the scheme (after 1186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // the colon) where we'll begin parsing. 1196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void parseAfterScheme(const CHAR* spec, int specLength, int afterScheme, URLSegments& parsed) 1206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 1216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int numberOfSlashes = consecutiveSlashes(spec, afterScheme, specLength); 1226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int afterSlashes = afterScheme + numberOfSlashes; 1236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // First split into two main parts, the authority (username, password, 1256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // host, and port) and the full path (path, query, and reference). 1266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen URLComponent authority; 1276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen URLComponent fullPath; 1286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Found "//<some data>", looks like an authority section. Treat 1306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // everything from there to the next slash (or end of spec) to be the 1316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // authority. Note that we ignore the number of slashes and treat it as 1326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // the authority. 1336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int authEnd = nextAuthorityTerminator(spec, afterSlashes, specLength); 1346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen authority = URLComponent(afterSlashes, authEnd - afterSlashes); 1356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (authEnd == specLength) // No beginning of path found. 1376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fullPath = URLComponent(); 1386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen else // Everything starting from the slash to the end is the path. 1396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fullPath = URLComponent(authEnd, specLength - authEnd); 1406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Now parse those two sub-parts. 1426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parseAuthority(spec, authority, parsed.username, parsed.password, parsed.host, parsed.port); 1436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsePath(spec, fullPath, parsed.path, parsed.query, parsed.fragment); 1446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 1456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // The main parsing function for standard URLs. Standard URLs have a scheme, 1476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // host, path, etc. 1486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void parseStandardURL(const CHAR* spec, int specLength, URLSegments& parsed) 1496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 1506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // FIXME: add ASSERT(specLength >= 0); 1516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Strip leading & trailing spaces and control characters. 1536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int begin = 0; 1546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen trimURL(spec, begin, specLength); 1556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int afterScheme; 1576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (extractScheme(spec, specLength, parsed.scheme)) 1586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen afterScheme = parsed.scheme.end() + 1; // Skip past the colon. 1596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen else { 1606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Say there's no scheme when there is a colon. We could also say 1616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // that everything is the scheme. Both would produce an invalid 1626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // URL, but this way seems less wrong in more cases. 1636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.scheme.reset(); 1646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen afterScheme = begin; 1656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 1666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parseAfterScheme(spec, specLength, afterScheme, parsed); 1676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 1686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void parsePath(const CHAR* spec, const URLComponent& path, URLComponent& filepath, URLComponent& query, URLComponent& fragment) 1706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 1716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<fragment> 1726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Special case when there is no path. 1746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (!path.isValid()) { 1756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen filepath.reset(); 1766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen query.reset(); 1776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fragment.reset(); 1786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return; 1796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 1806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // FIXME: add ASSERT(path.length() > 0); // We should never have 0 length paths. 1816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Search for first occurrence of either ? or #. 1836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int pathEnd = path.begin() + path.length(); 1846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 1856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int querySeparator = -1; // Index of the '?' 1866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int refSeparator = -1; // Index of the '#' 1876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = path.begin(); i < pathEnd; i++) { 1886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen switch (spec[i]) { 1896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen case '?': 1906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (querySeparator < 0) 1916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen querySeparator = i; 1926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 1936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen case '#': 1946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen refSeparator = i; 1956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen i = pathEnd; // Break out of the loop. 1966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 1976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen default: 1986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 1996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 2006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 2016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Markers pointing to the character after each of these corresponding 2036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // components. The code below works from the end back to the beginning, 2046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // and will update these indices as it finds components that exist. 2056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int fileEnd, queryEnd; 2066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Fragment: from the # to the end of the path. 2086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (refSeparator >= 0) { 2096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fileEnd = refSeparator; 2106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen queryEnd = refSeparator; 2116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fragment = URLComponent::fromRange(refSeparator + 1, pathEnd); 2126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } else { 2136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fileEnd = pathEnd; 2146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen queryEnd = pathEnd; 2156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fragment.reset(); 2166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 2176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Query fragment: everything from the ? to the next boundary (either 2196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // the end of the path or the fragment fragment). 2206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (querySeparator >= 0) { 2216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fileEnd = querySeparator; 2226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen query = URLComponent::fromRange(querySeparator + 1, queryEnd); 2236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } else 2246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen query.reset(); 2256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // File path: treat an empty file path as no file path. 2276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (fileEnd != path.begin()) 2286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen filepath = URLComponent::fromRange(path.begin(), fileEnd); 2296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen else 2306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen filepath.reset(); 2316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 2326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Initializes a path URL which is merely a scheme followed by a path. 2346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Examples include "about:foo" and "javascript:alert('bar');" 2356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void parsePathURL(const CHAR* spec, int specLength, URLSegments& parsed) 2366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 2376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Get the non-path and non-scheme parts of the URL out of the way, we 2386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // never use them. 2396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.username.reset(); 2406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.password.reset(); 2416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.host.reset(); 2426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.port.reset(); 2436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.query.reset(); 2446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.fragment.reset(); 2456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Strip leading & trailing spaces and control characters. 2476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // FIXME: Perhaps this is unnecessary? 2486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int begin = 0; 2496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen trimURL(spec, begin, specLength); 2506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Handle empty specs or ones that contain only whitespace or control 2526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // chars. 2536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (begin == specLength) { 2546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.scheme.reset(); 2556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.path.reset(); 2566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return; 2576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 2586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Extract the scheme, with the path being everything following. We also 2606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // handle the case where there is no scheme. 2616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (extractScheme(&spec[begin], specLength - begin, parsed.scheme)) { 2626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Offset the results since we gave extractScheme a substring. 2636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.scheme.setBegin(parsed.scheme.begin() + begin); 2646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // For compatibility with the standard URL parser, we treat no path 2666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // as -1, rather than having a length of 0 (we normally wouldn't 2676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // care so much for these non-standard URLs). 2686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (parsed.scheme.end() == specLength - 1) 2696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.path.reset(); 2706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen else 2716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.path = URLComponent::fromRange(parsed.scheme.end() + 1, specLength); 2726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } else { 2736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // No scheme found, just path. 2746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.scheme.reset(); 2756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.path = URLComponent::fromRange(begin, specLength); 2766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 2776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 2786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void parseMailtoURL(const CHAR* spec, int specLength, URLSegments& parsed) 2806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 2816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // FIXME: add ASSERT(specLength >= 0); 2826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Get the non-path and non-scheme parts of the URL out of the way, we 2846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // never use them. 2856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.username.reset(); 2866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.password.reset(); 2876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.host.reset(); 2886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.port.reset(); 2896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.fragment.reset(); 2906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.query.reset(); // May use this; reset for convenience. 2916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Strip leading & trailing spaces and control characters. 2936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int begin = 0; 2946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen trimURL(spec, begin, specLength); 2956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 2966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Handle empty specs or ones that contain only whitespace or control 2976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // chars. 2986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (begin == specLength) { 2996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.scheme.reset(); 3006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.path.reset(); 3016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return; 3026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int pathBegin = -1; 3056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int pathEnd = -1; 3066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Extract the scheme, with the path being everything following. We also 3086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // handle the case where there is no scheme. 3096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (extractScheme(&spec[begin], specLength - begin, parsed.scheme)) { 3106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Offset the results since we gave extractScheme a substring. 3116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.scheme.setBegin(parsed.scheme.begin() + begin); 3126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (parsed.scheme.end() != specLength - 1) { 3146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen pathBegin = parsed.scheme.end() + 1; 3156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen pathEnd = specLength; 3166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } else { 3186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // No scheme found, just path. 3196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.scheme.reset(); 3206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen pathBegin = begin; 3216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen pathEnd = specLength; 3226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Split [pathBegin, pathEnd) into a path + query. 3256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = pathBegin; i < pathEnd; ++i) { 3266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (spec[i] == '?') { 3276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.query = URLComponent::fromRange(i + 1, pathEnd); 3286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen pathEnd = i; 3296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 3306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // For compatibility with the standard URL parser, treat no path as 3346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // -1, rather than having a length of 0 3356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (pathBegin == pathEnd) 3366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.path.reset(); 3376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen else 3386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen parsed.path = URLComponent::fromRange(pathBegin, pathEnd); 3396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static int parsePort(const CHAR* spec, const URLComponent& component) 3426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 3436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Easy success case when there is no port. 3446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen const int maxDigits = 5; 3456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (component.isEmptyOrInvalid()) 3466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return UnspecifiedPort; 3476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen URLComponent nonZeroDigits(component.end(), 0); 3496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = 0; i < component.length(); ++i) { 3506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (spec[component.begin() + i] != '0') { 3516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen nonZeroDigits = URLComponent::fromRange(component.begin() + i, component.end()); 3526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 3536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (!nonZeroDigits.length()) 3566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return 0; // All digits were 0. 3576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (nonZeroDigits.length() > maxDigits) 3596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return InvalidPort; 3606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int port = 0; 3626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = 0; i < nonZeroDigits.length(); ++i) { 3636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen CHAR ch = spec[nonZeroDigits.begin() + i]; 3646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (!isPortDigit(ch)) 3656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return InvalidPort; 3666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen port *= 10; 3676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen port += static_cast<char>(ch) - '0'; 3686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (port > 65535) 3706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return InvalidPort; 3716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return port; 3726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void extractFileName(const CHAR* spec, const URLComponent& path, URLComponent& fileName) 3756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 3766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Handle empty paths: they have no file names. 3776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (path.isEmptyOrInvalid()) { 3786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fileName.reset(); 3796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return; 3806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Search backwards for a parameter, which is a normally unused field 3836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // in a URL delimited by a semicolon. We parse the parameter as part of 3846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // the path, but here, we don't want to count it. The last semicolon is 3856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // the parameter. 3866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int fileEnd = path.end(); 3876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = path.end() - 1; i > path.begin(); --i) { 3886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (spec[i] == ';') { 3896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fileEnd = i; 3906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 3916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 3936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 3946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Now search backwards from the filename end to the previous slash 3956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // to find the beginning of the filename. 3966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = fileEnd - 1; i >= path.begin(); --i) { 3976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (isURLSlash(spec[i])) { 3986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // File name is everything following this character to the end 3996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fileName = URLComponent::fromRange(i + 1, fileEnd); 4006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return; 4016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // No slash found, this means the input was degenerate (generally paths 4056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // will start with a slash). Let's call everything the file name. 4066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen fileName = URLComponent::fromRange(path.begin(), fileEnd); 4076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static bool extractQueryKeyValue(const CHAR* spec, URLComponent& query, URLComponent& key, URLComponent& value) 4106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 4116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (query.isEmptyOrInvalid()) 4126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return false; 4136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int start = query.begin(); 4156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int current = start; 4166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int end = query.end(); 4176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // We assume the beginning of the input is the beginning of the "key" 4196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // and we skip to the end of it. 4206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen key.setBegin(current); 4216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen while (current < end && spec[current] != '&' && spec[current] != '=') 4226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen ++current; 4236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen key.setLength(current - key.begin()); 4246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Skip the separator after the key (if any). 4266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (current < end && spec[current] == '=') 4276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen ++current; 4286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Find the value part. 4306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen value.setBegin(current); 4316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen while (current < end && spec[current] != '&') 4326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen ++current; 4336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen value.setLength(current - value.begin()); 4346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Finally skip the next separator if any 4366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (current < end && spec[current] == '&') 4376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen ++current; 4386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Save the new query 4406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen query = URLComponent::fromRange(current, end); 4416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return true; 4426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen// FIXME: This should be protected or private. 4456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsenpublic: 4466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // We treat slashes and backslashes the same for IE compatibility. 4476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static inline bool isURLSlash(CHAR ch) 4486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 4496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return ch == '/' || ch == '\\'; 4506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Returns true if we should trim this character from the URL because it is 4536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // a space or a control character. 4546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static inline bool shouldTrimFromURL(CHAR ch) 4556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 4566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return ch <= ' '; 4576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Given an already-initialized begin index and end index (the index after 4606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // the last CHAR in spec), this shrinks the range to eliminate 4616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // "should-be-trimmed" characters. 4626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static inline void trimURL(const CHAR* spec, int& begin, int& end) 4636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 4646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Strip leading whitespace and control characters. 4656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen while (begin < end && shouldTrimFromURL(spec[begin])) 4666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen ++begin; 4676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Strip trailing whitespace and control characters. We need the >i 4696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // test for when the input string is all blanks; we don't want to back 4706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // past the input. 4716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen while (end > begin && shouldTrimFromURL(spec[end - 1])) 4726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen --end; 4736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Counts the number of consecutive slashes starting at the given offset 4766c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // in the given string of the given length. 4776c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static inline int consecutiveSlashes(const CHAR *string, int beginOffset, int stringLength) 4786c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 4796c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int count = 0; 4806c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen while (beginOffset + count < stringLength && isURLSlash(string[beginOffset + count])) 4816c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen ++count; 4826c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return count; 4836c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4846c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4856c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsenprivate: 4866c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // URLParser cannot be constructed. 4876c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen URLParser(); 4886c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4896c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Returns true if the given character is a valid digit to use in a port. 4906c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static inline bool isPortDigit(CHAR ch) 4916c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 4926c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return ch >= '0' && ch <= '9'; 4936c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 4946c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 4956c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Returns the offset of the next authority terminator in the input starting 4966c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // from startOffset. If no terminator is found, the return value will be equal 4976c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // to specLength. 4986c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static int nextAuthorityTerminator(const CHAR* spec, int startOffset, int specLength) 4996c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 5006c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = startOffset; i < specLength; i++) { 5016c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (isPossibleAuthorityTerminator(spec[i])) 5026c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return i; 5036c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5046c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return specLength; // Not found. 5056c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5066c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 5076c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void parseUserInfo(const CHAR* spec, const URLComponent& user, URLComponent& username, URLComponent& password) 5086c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 5096c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Find the first colon in the user section, which separates the 5106c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // username and password. 5116c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int colonOffset = 0; 5126c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen while (colonOffset < user.length() && spec[user.begin() + colonOffset] != ':') 5136c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen ++colonOffset; 5146c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 5156c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (colonOffset < user.length()) { 5166c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Found separator: <username>:<password> 5176c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen username = URLComponent(user.begin(), colonOffset); 5186c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen password = URLComponent::fromRange(user.begin() + colonOffset + 1, user.begin() + user.length()); 5196c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } else { 5206c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // No separator, treat everything as the username 5216c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen username = user; 5226c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen password = URLComponent(); 5236c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5246c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5256c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 5266c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen static void parseServerInfo(const CHAR* spec, const URLComponent& serverInfo, URLComponent& host, URLComponent& port) 5276c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen { 5286c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (!serverInfo.length()) { 5296c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // No server info, host name is empty. 5306c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen host.reset(); 5316c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen port.reset(); 5326c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen return; 5336c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5346c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 5356c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // If the host starts with a left-bracket, assume the entire host is an 5366c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal. 5376c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // This assumption will be overridden if we find a right-bracket. 5386c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // 5396c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Our IPv6 address canonicalization code requires both brackets to 5406c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // exist, but the ability to locate an incomplete address can still be 5416c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // useful. 5426c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int ipv6Terminator = spec[serverInfo.begin()] == '[' ? serverInfo.end() : -1; 5436c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen int colon = -1; 5446c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 5456c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Find the last right-bracket, and the last colon. 5466c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen for (int i = serverInfo.begin(); i < serverInfo.end(); i++) { 5476c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen switch (spec[i]) { 5486c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen case ']': 5496c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen ipv6Terminator = i; 5506c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 5516c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen case ':': 5526c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen colon = i; 5536c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 5546c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen default: 5556c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen break; 5566c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5576c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5586c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 5596c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (colon > ipv6Terminator) { 5606c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // Found a port number: <hostname>:<port> 5616c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen host = URLComponent::fromRange(serverInfo.begin(), colon); 5626c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen if (!host.length()) 5636c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen host.reset(); 5646c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen port = URLComponent::fromRange(colon + 1, serverInfo.end()); 5656c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } else { 5666c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen // No port: <hostname> 5676c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen host = serverInfo; 5686c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen port.reset(); 5696c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5706c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen } 5716c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen}; 5726c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 5736c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen} // namespace WTF 5746c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen 5756c2af9490927c3c5959b5cb07461b646f8b32f6cKristian Monsen#endif // URLParser_h 576