18f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian/*
25f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Copyright (C) 2004, 2007, 2008, 2009 Apple Inc. All rights reserved.
32fc2651226baac27029e38c9d6ef883fa32084dbSteve Block * Copyright (C) 2008, 2009, 2011 Google Inc. All rights reserved.
48f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian *
58f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * Redistribution and use in source and binary forms, with or without
68f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * modification, are permitted provided that the following conditions are
78f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * met:
88f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian *
98f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian *     * Redistributions of source code must retain the above copyright
108f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * notice, this list of conditions and the following disclaimer.
118f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian *     * Redistributions in binary form must reproduce the above
128f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * copyright notice, this list of conditions and the following disclaimer
138f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * in the documentation and/or other materials provided with the
148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * distribution.
158f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian *     * Neither the name of Google Inc. nor the names of its
168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * contributors may be used to endorse or promote products derived from
178f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * this software without specific prior written permission.
188f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian *
198f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
208f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
218f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
228f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
238f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
248f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
258f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
268f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
278f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
288f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
298f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
308f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian */
318f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#include "config.h"
338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#if USE(GOOGLEURL)
358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#include "KURL.h"
368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
378f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#ifndef NDEBUG
388f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#include <stdio.h>
398f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#endif
408f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
41643ca7872b450ea4efacab6188849e5aac2ba161Steve Block#include <algorithm>
42643ca7872b450ea4efacab6188849e5aac2ba161Steve Block
438f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#include "NotImplemented.h"
448f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#include "TextEncoding.h"
45d0825bca7fe65beaee391d30da42e937db621564Steve Block#include <wtf/HashMap.h>
468f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#include <wtf/Vector.h>
47643ca7872b450ea4efacab6188849e5aac2ba161Steve Block#include <wtf/StdLibExtras.h>
48dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block#include <wtf/text/CString.h>
49f486d19d62f1bc33246748b14b14a9dfa617b57fIain Merrick#include <wtf/text/StringHash.h>
508f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
518f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#include <googleurl/src/url_util.h>
528f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
538f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianusing WTF::isASCIILower;
548f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianusing WTF::toASCIILower;
55643ca7872b450ea4efacab6188849e5aac2ba161Steve Blockusing std::binary_search;
568f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
578f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qiannamespace WebCore {
588f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
59dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Blockstatic const int maximumValidPortNumber = 0xFFFE;
60dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Blockstatic const int invalidPortNumber = 0xFFFF;
61dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block
628f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Wraps WebCore's text encoding in a character set converter for the
638f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// canonicalizer.
648f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianclass KURLCharsetConverter : public url_canon::CharsetConverter {
658f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianpublic:
662fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // The encoding parameter may be 0, but in this case the object must not be called.
678f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLCharsetConverter(const TextEncoding* encoding)
688f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        : m_encoding(encoding)
698f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    {
708f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
718f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
728f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    virtual void ConvertFromUTF16(const url_parse::UTF16Char* input, int inputLength,
738f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                                  url_canon::CanonOutput* output)
748f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    {
758f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        CString encoded = m_encoding->encode(input, inputLength, URLEncodedEntitiesForUnencodables);
768f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        output->Append(encoded.data(), static_cast<int>(encoded.length()));
778f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianprivate:
808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    const TextEncoding* m_encoding;
818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian};
828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
838f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Note that this function must be named differently than the one in KURL.cpp
848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// since our unit tests evilly include both files, and their local definition
858f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// will be ambiguous.
868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianstatic inline void assertProtocolIsGood(const char* protocol)
878f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
888f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#ifndef NDEBUG
898f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    const char* p = protocol;
908f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    while (*p) {
918f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z'));
928f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        ++p;
938f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
948f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#endif
958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Returns the characters for the given string, or a pointer to a static empty
982fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// string if the input string is null. This will always ensure we have a non-
992fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// null character pointer since ReplaceComponents has special meaning for null.
1008f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianstatic inline const url_parse::UTF16Char* CharactersOrEmpty(const String& str)
1018f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
1028f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    static const url_parse::UTF16Char zero = 0;
1038f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return str.characters() ?
1048f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian           reinterpret_cast<const url_parse::UTF16Char*>(str.characters()) :
1058f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian           &zero;
1068f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
1078f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1088f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianstatic inline bool isUnicodeEncoding(const TextEncoding* encoding)
1098f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
1108f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return encoding->encodingForFormSubmission() == UTF8Encoding();
1118f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
1128f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1138f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianstatic bool lowerCaseEqualsASCII(const char* begin, const char* end, const char* str)
1148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
1158f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    while (begin != end && *str) {
116cac0f67c402d107cdb10971b95719e2ff9c7c76bSteve Block        ASSERT(toASCIILower(*str) == *str);
1178f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        if (toASCIILower(*begin++) != *str++)
1188f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            return false;
1198f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
1208f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1218f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Both strings are equal (ignoring case) if and only if all of the characters were equal,
1228f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // and the end of both has been reached.
1238f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return begin == end && !*str;
1248f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
1258f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
126643ca7872b450ea4efacab6188849e5aac2ba161Steve Blockstatic inline bool isSchemeFirstChar(char c)
127643ca7872b450ea4efacab6188849e5aac2ba161Steve Block{
128643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
129643ca7872b450ea4efacab6188849e5aac2ba161Steve Block}
130643ca7872b450ea4efacab6188849e5aac2ba161Steve Block
131643ca7872b450ea4efacab6188849e5aac2ba161Steve Blockstatic inline bool isSchemeChar(char c)
132643ca7872b450ea4efacab6188849e5aac2ba161Steve Block{
133643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    return isSchemeFirstChar(c) || (c >= '0' && c <= '9') || c == '.' || c == '-' || c == '*';
134643ca7872b450ea4efacab6188849e5aac2ba161Steve Block}
135643ca7872b450ea4efacab6188849e5aac2ba161Steve Block
1362bde8e466a4451c7319e3a072d118917957d6554Steve Blockbool isValidProtocol(const String& protocol)
1372bde8e466a4451c7319e3a072d118917957d6554Steve Block{
1382bde8e466a4451c7319e3a072d118917957d6554Steve Block    // NOTE This is a copy of the function in KURL.cpp.
1392bde8e466a4451c7319e3a072d118917957d6554Steve Block    // RFC3986: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
1402bde8e466a4451c7319e3a072d118917957d6554Steve Block    if (protocol.isEmpty())
1412bde8e466a4451c7319e3a072d118917957d6554Steve Block        return false;
1422bde8e466a4451c7319e3a072d118917957d6554Steve Block    if (!isSchemeFirstChar(protocol[0]))
1432bde8e466a4451c7319e3a072d118917957d6554Steve Block        return false;
1442bde8e466a4451c7319e3a072d118917957d6554Steve Block    unsigned protocolLength = protocol.length();
1452bde8e466a4451c7319e3a072d118917957d6554Steve Block    for (unsigned i = 1; i < protocolLength; i++) {
1462bde8e466a4451c7319e3a072d118917957d6554Steve Block        if (!isSchemeChar(protocol[i]))
1472bde8e466a4451c7319e3a072d118917957d6554Steve Block            return false;
1482bde8e466a4451c7319e3a072d118917957d6554Steve Block    }
1492bde8e466a4451c7319e3a072d118917957d6554Steve Block    return true;
1502bde8e466a4451c7319e3a072d118917957d6554Steve Block}
1512bde8e466a4451c7319e3a072d118917957d6554Steve Block
1528f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1538f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// KURLGooglePrivate -----------------------------------------------------------
1548f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1558f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianKURLGooglePrivate::KURLGooglePrivate()
1568f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    : m_isValid(false)
1578f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    , m_protocolInHTTPFamily(false)
1588f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    , m_utf8IsASCII(true)
1598f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    , m_stringIsValid(false)
1608f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
1618f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
1628f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1638f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianKURLGooglePrivate::KURLGooglePrivate(const url_parse::Parsed& parsed, bool isValid)
1648f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    : m_isValid(isValid)
1658f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    , m_protocolInHTTPFamily(false)
1668f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    , m_parsed(parsed)
1678f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    , m_utf8IsASCII(true)
1688f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    , m_stringIsValid(false)
1698f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
1708f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
1718f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1722daae5fd11344eaa88a0d92b0f6d65f8d2255c00Ben MurdochKURLGooglePrivate::KURLGooglePrivate(WTF::HashTableDeletedValueType)
1732daae5fd11344eaa88a0d92b0f6d65f8d2255c00Ben Murdoch    : m_string(WTF::HashTableDeletedValue)
1742daae5fd11344eaa88a0d92b0f6d65f8d2255c00Ben Murdoch{
1752daae5fd11344eaa88a0d92b0f6d65f8d2255c00Ben Murdoch}
1762daae5fd11344eaa88a0d92b0f6d65f8d2255c00Ben Murdoch
1778f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Setters for the data. Using the ASCII version when you know the
1788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// data is ASCII will be slightly more efficient. The UTF-8 version
1798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// will always be correct if the caller is unsure.
1808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURLGooglePrivate::setUtf8(const CString& str)
1818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
1828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    const char* data = str.data();
1838f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    unsigned dataLength = str.length();
1848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1858f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // The m_utf8IsASCII must always be correct since the DeprecatedString
1868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // getter must create it with the proper constructor. This test can be
1878f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // removed when DeprecatedString is gone, but it still might be a
1888f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // performance win.
1898f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_utf8IsASCII = true;
1908f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    for (unsigned i = 0; i < dataLength; i++) {
1918f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        if (static_cast<unsigned char>(data[i]) >= 0x80) {
1928f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            m_utf8IsASCII = false;
1938f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            break;
1948f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        }
1958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
1968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
1978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_utf8 = str;
1988f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_stringIsValid = false;
1998f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    initProtocolInHTTPFamily();
2008f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
2018f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2028f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURLGooglePrivate::setAscii(const CString& str)
2038f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
2048f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_utf8 = str;
2058f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_utf8IsASCII = true;
2068f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_stringIsValid = false;
2078f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    initProtocolInHTTPFamily();
2088f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
2098f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2108f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURLGooglePrivate::init(const KURL& base,
2118f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                             const String& relative,
2128f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                             const TextEncoding* queryEncoding)
2138f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
2148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    init(base, relative.characters(), relative.length(), queryEncoding);
2158f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
2168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2172fc2651226baac27029e38c9d6ef883fa32084dbSteve Blocktemplate <typename CHAR>
2182fc2651226baac27029e38c9d6ef883fa32084dbSteve Blockvoid KURLGooglePrivate::init(const KURL& base, const CHAR* rel, int relLength,
2198f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                             const TextEncoding* queryEncoding)
2208f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
2212fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // As a performance optimization, we do not use the charset converter
2222fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // if encoding is UTF-8 or other Unicode encodings. Note that this is
2232fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more
2242fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // efficient with no charset converter object because it can do UTF-8
2252fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // internally with no extra copies.
2268f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2278f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // We feel free to make the charset converter object every time since it's
2288f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // just a wrapper around a reference.
2298f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLCharsetConverter charsetConverterObject(queryEncoding);
2308f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLCharsetConverter* charsetConverter =
2318f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 :
2328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        &charsetConverterObject;
2338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    url_canon::RawCanonOutputT<char> output;
2358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    const CString& baseStr = base.m_url.utf8String();
2368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),
2378f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                                          base.m_url.m_parsed, rel, relLength,
2388f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                                          charsetConverter,
2398f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                                          &output, &m_parsed);
2408f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2418f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // See FIXME in KURLGooglePrivate in the header. If canonicalization has not
2428f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // changed the string, we can avoid an extra allocation by using assignment.
2438f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    //
2448f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // When KURL encounters an error such that the URL is invalid and empty
2458f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // (for example, resolving a relative URL on a non-hierarchical base), it
2468f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // will produce an isNull URL, and calling setUtf8 will produce an empty
2478f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // non-null URL. This is unlikely to affect anything, but we preserve this
2488f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // just in case.
2498f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (m_isValid || output.length()) {
2508f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // Without ref, the whole url is guaranteed to be ASCII-only.
2518f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        if (m_parsed.ref.is_nonempty())
2528f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            setUtf8(CString(output.data(), output.length()));
2538f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        else
2548f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            setAscii(CString(output.data(), output.length()));
2558f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    } else {
2562fc2651226baac27029e38c9d6ef883fa32084dbSteve Block        // WebCore expects resolved URLs to be empty rather than null.
2578f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        setUtf8(CString("", 0));
2588f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
2598f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
2608f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2618f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURLGooglePrivate::initProtocolInHTTPFamily()
2628f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
2638f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_isValid) {
2648f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_protocolInHTTPFamily = false;
2658f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return;
2668f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
2678f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2688f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    const char* scheme = m_utf8.data() + m_parsed.scheme.begin;
2698f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (m_parsed.scheme.len == 4)
2708f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 4, "http");
2718f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    else if (m_parsed.scheme.len == 5)
2728f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_protocolInHTTPFamily = lowerCaseEqualsASCII(scheme, scheme + 5, "https");
2738f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    else
2748f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_protocolInHTTPFamily = false;
2758f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
2768f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2778f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURLGooglePrivate::copyTo(KURLGooglePrivate* dest) const
2788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
2798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    dest->m_isValid = m_isValid;
2808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    dest->m_protocolInHTTPFamily = m_protocolInHTTPFamily;
2818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    dest->m_parsed = m_parsed;
2828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2838f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Don't copy the 16-bit string since that will be regenerated as needed.
2848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    dest->m_utf8 = CString(m_utf8.data(), m_utf8.length());
2858f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    dest->m_utf8IsASCII = m_utf8IsASCII;
2868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    dest->m_stringIsValid = false;
2878f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
2888f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
2898f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString KURLGooglePrivate::componentString(const url_parse::Component& comp) const
2908f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
2918f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_isValid || comp.len <= 0) {
2922fc2651226baac27029e38c9d6ef883fa32084dbSteve Block        // KURL returns a null string if the URL is itself a null string, and an
2932fc2651226baac27029e38c9d6ef883fa32084dbSteve Block        // empty string for other nonexistent entities.
2948f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        if (utf8String().isNull())
2958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            return String();
2968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return String("", 0);
2978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
2988f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // begin and len are in terms of bytes which do not match
2998f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // if string() is UTF-16 and input contains non-ASCII characters.
3008f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // However, the only part in urlString that can contain non-ASCII
3018f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // characters is 'ref' at the end of the string. In that case,
3028f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // begin will always match the actual value and len (in terms of
3038f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // byte) will be longer than what's needed by 'mid'. However, mid
3048f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // truncates len to avoid go past the end of a string so that we can
3058f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // get away withtout doing anything here.
3068f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return string().substring(comp.begin, comp.len);
3078f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
3088f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3098f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURLGooglePrivate::replaceComponents(const Replacements& replacements)
3108f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
3118f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    url_canon::RawCanonOutputT<char> output;
3128f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    url_parse::Parsed newParsed;
3138f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_isValid = url_util::ReplaceComponents(utf8String().data(),
3158f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                                            utf8String().length(), m_parsed, replacements, 0, &output, &newParsed);
3168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3178f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_parsed = newParsed;
3188f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (m_parsed.ref.is_nonempty())
3198f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        setUtf8(CString(output.data(), output.length()));
3208f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    else
3218f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        setAscii(CString(output.data(), output.length()));
3228f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
3238f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3248f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianconst String& KURLGooglePrivate::string() const
3258f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
3268f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_stringIsValid) {
3272fc2651226baac27029e38c9d6ef883fa32084dbSteve Block        // Handle the null case separately. Otherwise, constructing
3282fc2651226baac27029e38c9d6ef883fa32084dbSteve Block        // the string like we do below would generate the empty string,
3292fc2651226baac27029e38c9d6ef883fa32084dbSteve Block        // not the null string.
3308f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        if (m_utf8.isNull())
3318f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            m_string = String();
3328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        else if (m_utf8IsASCII)
3338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            m_string = String(m_utf8.data(), m_utf8.length());
3348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        else
3358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            m_string = String::fromUTF8(m_utf8.data(), m_utf8.length());
3368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_stringIsValid = true;
3378f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
3388f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_string;
3398f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
3408f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3418f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// KURL ------------------------------------------------------------------------
3428f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3432fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// Creates with null-terminated string input representing an absolute URL.
3448f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// WebCore generally calls this only with hardcoded strings, so the input is
3452fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// ASCII. We treat it as UTF-8 just in case.
346231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve BlockKURL::KURL(ParsedURLStringTag, const char *url)
3478f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
3482fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // FIXME The Mac code checks for beginning with a slash and converts it to
3498f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // file: URL. We will want to add this as well once we can compile on a
3508f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // system like that.
3518f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.init(KURL(), url, strlen(url), 0);
3528f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3532fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // The one-argument constructors should never generate a null string.
3548f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // This is a funny quirk of KURL.cpp (probably a bug) which we preserve.
3558f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (m_url.utf8String().isNull())
3568f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.setAscii(CString("", 0));
3578f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
3588f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3598f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Initializes with a string representing an absolute URL. No encoding
3608f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// information is specified. This generally happens when a KURL is converted
3618f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// to a string and then converted back. In this case, the URL is already
3622fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// canonical and in proper escaped form so needs no encoding. We treat it as
3638f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// UTF-8 just in case.
364231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve BlockKURL::KURL(ParsedURLStringTag, const String& url)
3658f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
3668f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!url.isNull())
3678f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.init(KURL(), url, 0);
3688f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    else {
3698f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // WebCore expects us to preserve the nullness of strings when this
3708f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // constructor is used. In all other cases, it expects a non-null
3718f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // empty string, which is what init() will create.
3728f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.m_isValid = false;
3738f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.m_protocolInHTTPFamily = false;
3748f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
3758f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
3768f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3778f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Constructs a new URL given a base URL and a possibly relative input URL.
3788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// This assumes UTF-8 encoding.
3798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianKURL::KURL(const KURL& base, const String& relative)
3808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
3818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.init(base, relative, 0);
3828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
3838f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Constructs a new URL given a base URL and a possibly relative input URL.
3858f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Any query portion of the relative URL will be encoded in the given encoding.
3868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianKURL::KURL(const KURL& base,
3878f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian           const String& relative,
3888f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian           const TextEncoding& encoding)
3898f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
3908f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.init(base, relative, &encoding.encodingForFormSubmission());
3918f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
3928f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
3938f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianKURL::KURL(const CString& canonicalSpec,
3948f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian           const url_parse::Parsed& parsed, bool isValid)
3958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    : m_url(parsed, isValid)
3968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
3978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // We know the reference fragment is the only part that can be UTF-8, so
3988f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // we know it's ASCII when there is no ref.
3998f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (parsed.ref.is_nonempty())
4008f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.setUtf8(canonicalSpec);
4018f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    else
4028f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.setAscii(canonicalSpec);
4038f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4048f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
40581bc750723a18f21cd17d1b173cd2a4dda9cea6eBen Murdoch#if USE(CF)
4068f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianKURL::KURL(CFURLRef)
4078f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4088f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    notImplemented();
4098f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    invalidate();
4108f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4118f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4128f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianCFURLRef KURL::createCFURL() const
4138f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    notImplemented();
4158f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return 0;
4168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4178f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#endif
4188f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4198f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianKURL KURL::copy() const
4208f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4218f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURL result = *this;
4228f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.copyTo(&result.m_url);
4238f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return result;
4248f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4258f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4268f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianbool KURL::isNull() const
4278f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4288f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.utf8String().isNull();
4298f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4308f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4318f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianbool KURL::isEmpty() const
4328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return !m_url.utf8String().length();
4348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianbool KURL::isValid() const
4378f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4388f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.m_isValid;
4398f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4408f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
441643ca7872b450ea4efacab6188849e5aac2ba161Steve Blockbool KURL::hasPort() const
442643ca7872b450ea4efacab6188849e5aac2ba161Steve Block{
443643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    return hostEnd() < pathStart();
444643ca7872b450ea4efacab6188849e5aac2ba161Steve Block}
445643ca7872b450ea4efacab6188849e5aac2ba161Steve Block
4468f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianbool KURL::protocolInHTTPFamily() const
4478f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4488f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.m_protocolInHTTPFamily;
4498f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4508f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4518f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianbool KURL::hasPath() const
4528f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4538f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Note that http://www.google.com/" has a path, the path is "/". This can
4548f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // return false only for invalid or nonstandard URLs.
4558f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.m_parsed.path.len >= 0;
4568f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4578f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4588f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// We handle "parameters" separated by a semicolon, while KURL.cpp does not,
4598f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// which can lead to different results in some cases.
4608f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString KURL::lastPathComponent() const
4618f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4628f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // When the output ends in a slash, WebCore has different expectations than
4638f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // the GoogleURL library. For "/foo/bar/" the library will return the empty
4648f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // string, but WebCore wants "bar".
4658f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    url_parse::Component path = m_url.m_parsed.path;
4668f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (path.len > 0 && m_url.utf8String().data()[path.end() - 1] == '/')
4678f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        path.len--;
4688f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4698f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    url_parse::Component file;
4708f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    url_parse::ExtractFileName(m_url.utf8String().data(), path, &file);
4718f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4728f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns
4738f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // a null string when the path is empty, which we duplicate here.
4748f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!file.is_nonempty())
4758f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return String();
4768f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.componentString(file);
4778f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString KURL::protocol() const
4808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.componentString(m_url.m_parsed.scheme);
4828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4838f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
4848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString KURL::host() const
4858f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Note: KURL.cpp unescapes here.
4878f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.componentString(m_url.m_parsed.host);
4888f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
4898f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
490dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block// Returns 0 when there is no port.
4918f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian//
4928f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// We treat URL's with out-of-range port numbers as invalid URLs, and they will
4938f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// be rejected by the canonicalizer. KURL.cpp will allow them in parsing, but
494dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block// return invalidPortNumber from this port() function, so we mirror that behavior here.
4958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianunsigned short KURL::port() const
4968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
4978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_url.m_isValid || m_url.m_parsed.port.len <= 0)
4988f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return 0;
4998f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    int port = url_parse::ParsePort(m_url.utf8String().data(), m_url.m_parsed.port);
500dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    ASSERT(port != url_parse::PORT_UNSPECIFIED); // Checked port.len <= 0 before.
501dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block
502dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    if (port == url_parse::PORT_INVALID || port > maximumValidPortNumber) // Mimic KURL::port()
503dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block        port = invalidPortNumber;
504dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block
5058f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return static_cast<unsigned short>(port);
5068f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
5078f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
5088f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Returns the empty string if there is no password.
5098f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString KURL::pass() const
5108f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
5118f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns
5128f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // a null string when the password is empty, which we duplicate here.
5138f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_url.m_parsed.password.is_nonempty())
5148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return String();
5158f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
5168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Note: KURL.cpp unescapes here.
5178f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.componentString(m_url.m_parsed.password);
5188f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
5198f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
5208f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Returns the empty string if there is no username.
5218f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString KURL::user() const
5228f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
5238f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Note: KURL.cpp unescapes here.
5248f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.componentString(m_url.m_parsed.username);
5258f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
5268f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
5270bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben MurdochString KURL::fragmentIdentifier() const
5288f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
5298f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Empty but present refs ("foo.com/bar#") should result in the empty
5302fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // string, which m_url.componentString will produce. Nonexistent refs
5312fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    // should be the null string.
5328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_url.m_parsed.ref.is_valid())
5338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return String();
5348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
5358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Note: KURL.cpp unescapes here.
5368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.componentString(m_url.m_parsed.ref);
5378f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
5388f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
5390bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochbool KURL::hasFragmentIdentifier() const
5408f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
5418f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Note: KURL.cpp unescapes here.
5428f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // FIXME determine if KURL.cpp agrees about an empty ref
5438f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.m_parsed.ref.len >= 0;
5448f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
5458f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
546a94275402997c11dd2e778633dacf4b7e630a35dBen Murdochvoid KURL::copyParsedQueryTo(ParsedURLParameters& parameters) const
547a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch{
548a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    String query = m_url.componentString(m_url.m_parsed.query);
549a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    const UChar* pos = query.characters();
550a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    const UChar* end = query.characters() + query.length();
551a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    while (pos < end) {
552a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        const UChar* parameterStart = pos;
553a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        while (pos < end && *pos != '&')
554a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch            ++pos;
555a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        const UChar* parameterEnd = pos;
556a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        if (pos < end) {
557a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch            ASSERT(*pos == '&');
558a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch            ++pos;
559a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        }
560a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        if (parameterStart == parameterEnd)
561a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch            continue;
562a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        const UChar* nameStart = parameterStart;
563a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        const UChar* equalSign = parameterStart;
564a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        while (equalSign < parameterEnd && *equalSign != '=')
565a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch            ++equalSign;
566a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        if (equalSign == nameStart)
567a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch            continue;
568a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        String name(nameStart, equalSign - nameStart);
569a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        String value = equalSign == parameterEnd ? String() : String(equalSign + 1, parameterEnd - equalSign - 1);
570a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        parameters.set(name, value);
571a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    }
572a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch}
573a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch
5745f1ab04193ad0130ca8204aadaceae083aca9881Feng QianString KURL::baseAsString() const
5755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian{
5765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    // FIXME: There is probably a more efficient way to do this?
5775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    return string().left(pathAfterLastSlash());
5785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian}
5795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString KURL::query() const
5818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
5828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (m_url.m_parsed.query.len >= 0)
5838f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return m_url.componentString(m_url.m_parsed.query);
5848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
5858f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns
5868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // an empty string when the query is empty rather than a null (not sure
5878f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // which is right).
588643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    // Returns a null if the query is not specified, instead of empty.
589643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    if (m_url.m_parsed.query.is_valid())
590643ca7872b450ea4efacab6188849e5aac2ba161Steve Block        return String("", 0);
591643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    return String();
5928f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
5938f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
5948f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString KURL::path() const
5958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
5968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.componentString(m_url.m_parsed.path);
5978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
5988f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
599d0825bca7fe65beaee391d30da42e937db621564Steve Blockbool KURL::setProtocol(const String& protocol)
6008f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
601dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // Firefox and IE remove everything after the first ':'.
602dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    int separatorPosition = protocol.find(':');
603dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    String newProtocol = protocol.substring(0, separatorPosition);
604dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block
605dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // If KURL is given an invalid scheme, it returns failure without modifying
606dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // the URL at all. This is in contrast to most other setters which modify
607dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // the URL and set "m_isValid."
608dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    url_canon::RawCanonOutputT<char> canonProtocol;
609dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    url_parse::Component protocolComponent;
610dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    if (!url_canon::CanonicalizeScheme(newProtocol.characters(),
611dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block                                       url_parse::Component(0, newProtocol.length()),
612dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block                                       &canonProtocol, &protocolComponent)
613dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block        || !protocolComponent.is_nonempty())
614dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block        return false;
615dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block
6168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
617dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    replacements.SetScheme(CharactersOrEmpty(newProtocol),
618dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block                           url_parse::Component(0, newProtocol.length()));
6198f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
620dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block
621dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // isValid could be false but we still return true here. This is because
622dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // WebCore or JS scripts can build up a URL by setting individual
623dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // components, and a JS exception is based on the return value of this
624dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // function. We want to throw the exception and stop the script only when
625dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // its trying to set a bad protocol, and not when it maybe just hasn't
626dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // finished building up its final scheme.
627d0825bca7fe65beaee391d30da42e937db621564Steve Block    return true;
6288f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
6298f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
6308f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::setHost(const String& host)
6318f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
6328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
6338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    replacements.SetHost(CharactersOrEmpty(host),
6348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                         url_parse::Component(0, host.length()));
6358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
6368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
6378f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
6388f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::setHostAndPort(const String& s)
6398f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
640643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    String host = s;
641643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    String port;
642643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    int hostEnd = s.find(":");
643643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    if (hostEnd != -1) {
644643ca7872b450ea4efacab6188849e5aac2ba161Steve Block        host = s.left(hostEnd);
645643ca7872b450ea4efacab6188849e5aac2ba161Steve Block        port = s.substring(hostEnd + 1);
646643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    }
6478f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
6488f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
6498f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Host can't be removed, so we always set.
650643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    replacements.SetHost(CharactersOrEmpty(host),
651643ca7872b450ea4efacab6188849e5aac2ba161Steve Block                         url_parse::Component(0, host.length()));
6528f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
653643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    if (port.isEmpty())  // Port may be removed, so we support clearing.
6548f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        replacements.ClearPort();
6558f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    else
656643ca7872b450ea4efacab6188849e5aac2ba161Steve Block        replacements.SetPort(CharactersOrEmpty(port), url_parse::Component(0, port.length()));
6578f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
6588f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
6598f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
660643ca7872b450ea4efacab6188849e5aac2ba161Steve Blockvoid KURL::removePort()
661643ca7872b450ea4efacab6188849e5aac2ba161Steve Block{
662643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    if (hasPort()) {
663643ca7872b450ea4efacab6188849e5aac2ba161Steve Block        String urlWithoutPort = m_url.string().left(hostEnd()) + m_url.string().substring(pathStart());
664643ca7872b450ea4efacab6188849e5aac2ba161Steve Block        m_url.setUtf8(urlWithoutPort.utf8());
665643ca7872b450ea4efacab6188849e5aac2ba161Steve Block    }
666643ca7872b450ea4efacab6188849e5aac2ba161Steve Block}
667643ca7872b450ea4efacab6188849e5aac2ba161Steve Block
6688f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::setPort(unsigned short i)
6698f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
6708f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
6718f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    String portStr;
6728f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
673c57e584da242d96ba18578a71f4634adc9d1fff6Steve Block    portStr = String::number(i);
674c57e584da242d96ba18578a71f4634adc9d1fff6Steve Block    replacements.SetPort(
675c57e584da242d96ba18578a71f4634adc9d1fff6Steve Block        reinterpret_cast<const url_parse::UTF16Char*>(portStr.characters()),
676c57e584da242d96ba18578a71f4634adc9d1fff6Steve Block        url_parse::Component(0, portStr.length()));
677c57e584da242d96ba18578a71f4634adc9d1fff6Steve Block
6788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
6798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
6808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
6818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::setUser(const String& user)
6828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
6838f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // This function is commonly called to clear the username, which we
6848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // normally don't have, so we optimize this case.
6858f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (user.isEmpty() && !m_url.m_parsed.username.is_valid())
6868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return;
6878f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
6888f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // The canonicalizer will clear any usernames that are empty, so we
6898f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // don't have to explicitly call ClearUsername() here.
6908f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
6918f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    replacements.SetUsername(CharactersOrEmpty(user),
6928f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                             url_parse::Component(0, user.length()));
6938f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
6948f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
6958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
6968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::setPass(const String& pass)
6978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
6988f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // This function is commonly called to clear the password, which we
6998f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // normally don't have, so we optimize this case.
7008f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (pass.isEmpty() && !m_url.m_parsed.password.is_valid())
7018f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return;
7028f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7038f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // The canonicalizer will clear any passwords that are empty, so we
7048f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // don't have to explicitly call ClearUsername() here.
7058f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
7068f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    replacements.SetPassword(CharactersOrEmpty(pass),
7078f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                             url_parse::Component(0, pass.length()));
7088f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
7098f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
7108f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7110bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochvoid KURL::setFragmentIdentifier(const String& s)
7128f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
7138f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // This function is commonly called to clear the ref, which we
7148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // normally don't have, so we optimize this case.
7150bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    if (s.isNull() && !m_url.m_parsed.ref.is_valid())
7168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return;
7178f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7188f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
7190bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    if (s.isNull())
7208f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        replacements.ClearRef();
7218f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    else
7220bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        replacements.SetRef(CharactersOrEmpty(s), url_parse::Component(0, s.length()));
7238f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
7248f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
7258f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7260bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochvoid KURL::removeFragmentIdentifier()
7278f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
7288f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
7298f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    replacements.ClearRef();
7308f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
7318f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
7328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::setQuery(const String& query)
7348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
7358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
7368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (query.isNull()) {
7372fc2651226baac27029e38c9d6ef883fa32084dbSteve Block        // KURL.cpp sets to null to clear any query.
7388f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        replacements.ClearQuery();
7398f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    } else if (query.length() > 0 && query[0] == '?') {
7408f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // WebCore expects the query string to begin with a question mark, but
7418f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // GoogleURL doesn't. So we trim off the question mark when setting.
7428f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        replacements.SetQuery(CharactersOrEmpty(query),
7438f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                              url_parse::Component(1, query.length() - 1));
7448f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    } else {
7458f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // When set with the empty string or something that doesn't begin with
7468f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // a question mark, KURL.cpp will add a question mark for you. The only
7478f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // way this isn't compatible is if you call this function with an empty
7488f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // string. KURL.cpp will leave a '?' with nothing following it in the
7498f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // URL, whereas we'll clear it.
7508f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        // FIXME We should eliminate this difference.
7518f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        replacements.SetQuery(CharactersOrEmpty(query),
7528f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                              url_parse::Component(0, query.length()));
7538f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
7548f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
7558f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
7568f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7578f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::setPath(const String& path)
7588f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
7598f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Empty paths will be canonicalized to "/", so we don't have to worry
7608f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // about calling ClearPath().
7618f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    KURLGooglePrivate::Replacements replacements;
7628f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    replacements.SetPath(CharactersOrEmpty(path),
7638f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                         url_parse::Component(0, path.length()));
7648f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.replaceComponents(replacements);
7658f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
7668f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7678f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// On Mac, this just seems to return the same URL, but with "/foo/bar" for
7688f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// file: URLs instead of file:///foo/bar. We don't bother with any of this,
7698f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// at least for now.
7706ed1fdfa7999878a811b09cdd647fbeace4353b8Steve BlockString KURL::deprecatedString() const
7718f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
7728f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_url.m_isValid)
7738f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return String();
7748f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.string();
7758f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
7768f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7778f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString decodeURLEscapeSequences(const String& str)
7788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
7798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return decodeURLEscapeSequences(str, UTF8Encoding());
7808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
7818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
7828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// In KURL.cpp's implementation, this is called by every component getter.
7832fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// It will unescape every character, including '\0'. This is scary, and may
7848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// cause security holes. We never call this function for components, and
7858f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// just return the ASCII versions instead.
7868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian//
787231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// This function is also used to decode javascript: URLs and as a general
788231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// purpose unescaping function.
7898f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian//
7908f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// FIXME These should be merged to the KURL.cpp implementation.
7918f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString decodeURLEscapeSequences(const String& str, const TextEncoding& encoding)
7928f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
7938f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // FIXME We can probably use KURL.cpp's version of this function
7948f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // without modification. However, I'm concerned about
7958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // https://bugs.webkit.org/show_bug.cgi?id=20559 so am keeping this old
7968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // custom code for now. Using their version will also fix the bug that
7978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // we ignore the encoding.
7988f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    //
7998f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // FIXME b/1350291: This does not get called very often. We just convert
8008f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // first to 8-bit UTF-8, then unescape, then back to 16-bit. This kind of
8018f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // sucks, and we don't use the encoding properly, which will make some
8028f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // obscure anchor navigations fail.
8038f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    CString cstr = str.utf8();
8048f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8058f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    const char* input = cstr.data();
8068f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    int inputLength = cstr.length();
8078f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
808967717af5423377c967781471ee106e2bb4e11c8Ben Murdoch    url_canon::RawCanonOutputT<url_parse::UTF16Char> unescaped;
809967717af5423377c967781471ee106e2bb4e11c8Ben Murdoch
810967717af5423377c967781471ee106e2bb4e11c8Ben Murdoch    url_util::DecodeURLEscapeSequences(input, inputLength, &unescaped);
8118f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
812967717af5423377c967781471ee106e2bb4e11c8Ben Murdoch    return String(reinterpret_cast<UChar*>(unescaped.data()),
813967717af5423377c967781471ee106e2bb4e11c8Ben Murdoch                  unescaped.length());
8148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
8158f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianbool KURL::protocolIs(const char* protocol) const
8178f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
8188f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    assertProtocolIsGood(protocol);
8195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
8205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    // JavaScript URLs are "valid" and should be executed even if KURL decides they are invalid.
8215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    // The free function protocolIsJavaScript() should be used instead.
8225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    // FIXME: Chromium code needs to be fixed for this assert to be enabled. ASSERT(strcmp(protocol, "javascript"));
8235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
8248f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (m_url.m_parsed.scheme.len <= 0)
8258f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return !protocol;
8268f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return lowerCaseEqualsASCII(
8278f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.utf8String().data() + m_url.m_parsed.scheme.begin,
8288f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.utf8String().data() + m_url.m_parsed.scheme.end(),
8298f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        protocol);
8308f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
8318f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// This is called to escape a URL string. It is only used externally when
8338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// constructing mailto: links to set the query section. Since our query setter
8348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// will automatically do the correct escaping, this function does not have to
8358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// do any work.
8368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian//
8372fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// There is a possibility that a future caller may use this function in other
8388f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// ways, and may expect to get a valid URL string. The dangerous thing we want
8392fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// to protect against here is accidentally getting '\0' characters in a string
8402fc2651226baac27029e38c9d6ef883fa32084dbSteve Block// that is not supposed to have them. Therefore, we escape these characters.
8418f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng QianString encodeWithURLEscapeSequences(const String& notEncodedString)
8428f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
8438f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    CString utf8 = UTF8Encoding().encode(
8448f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        reinterpret_cast<const UChar*>(notEncodedString.characters()),
8458f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        notEncodedString.length(),
8468f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        URLEncodedEntitiesForUnencodables);
8478f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    const char* input = utf8.data();
8488f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    int inputLength = utf8.length();
8498f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8508f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    Vector<char, 2048> buffer;
8518f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    for (int i = 0; i < inputLength; i++) {
8528f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        if (!input[i])
8538f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            buffer.append("%00", 3);
8548f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        else
8558f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian            buffer.append(input[i]);
8568f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    }
8578f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return String(buffer.data(), buffer.size());
8588f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
8598f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8608f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianbool KURL::isHierarchical() const
8618f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
8628f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_url.m_parsed.scheme.is_nonempty())
8638f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return false;
8648f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return url_util::IsStandard(
8658f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        &m_url.utf8String().data()[m_url.m_parsed.scheme.begin],
8668f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        m_url.m_parsed.scheme);
8678f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
8688f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8698f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#ifndef NDEBUG
8708f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::print() const
8718f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
8728f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    printf("%s\n", m_url.utf8String().data());
8738f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
8748f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#endif
8758f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8768f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianvoid KURL::invalidate()
8778f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
8788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // This is only called from the constructor so resetting the (automatically
8798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // initialized) string and parsed structure would be a waste of time.
8808f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.m_isValid = false;
8818f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    m_url.m_protocolInHTTPFamily = false;
8828f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
8838f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8848f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian// Equal up to reference fragments, if any.
8850bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochbool equalIgnoringFragmentIdentifier(const KURL& a, const KURL& b)
8868f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
8878f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Compute the length of each URL without its ref. Note that the reference
8888f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // begin (if it exists) points to the character *after* the '#', so we need
8898f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // to subtract one.
8908f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    int aLength = a.m_url.utf8String().length();
8918f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (a.m_url.m_parsed.ref.len >= 0)
8928f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        aLength = a.m_url.m_parsed.ref.begin - 1;
8938f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8948f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    int bLength = b.m_url.utf8String().length();
8958f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (b.m_url.m_parsed.ref.len >= 0)
8968f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        bLength = b.m_url.m_parsed.ref.begin - 1;
8978f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
8988f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return aLength == bLength
8998f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        && !strncmp(a.m_url.utf8String().data(), b.m_url.utf8String().data(), aLength);
9008f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
9018f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9028f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianunsigned KURL::hostStart() const
9038f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
9048f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::HOST, false);
9058f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
9068f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9078f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianunsigned KURL::hostEnd() const
9088f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
9098f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PORT, true);
9108f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
9118f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9128f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianunsigned KURL::pathStart() const
9138f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
9148f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PATH, false);
9158f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
9168f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9178f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianunsigned KURL::pathEnd() const
9188f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
9198f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::QUERY, true);
9208f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
9218f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9228f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianunsigned KURL::pathAfterLastSlash() const
9238f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
9248f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // When there's no path, ask for what would be the beginning of it.
9258f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    if (!m_url.m_parsed.path.is_valid())
9268f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian        return m_url.m_parsed.CountCharactersBefore(url_parse::Parsed::PATH, false);
9278f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9288f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    url_parse::Component filename;
9298f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    url_parse::ExtractFileName(m_url.utf8String().data(), m_url.m_parsed.path,
9308f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian                               &filename);
9318f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return filename.begin;
9328f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
9338f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9348f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianbool protocolIs(const String& url, const char* protocol)
9358f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
9368f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    // Do the comparison without making a new string object.
9378f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    assertProtocolIsGood(protocol);
938dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block
939dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    // Check the scheme like GURL does.
940dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block    return url_util::FindAndCompareScheme(url.characters(), url.length(),
9412fc2651226baac27029e38c9d6ef883fa32084dbSteve Block        protocol, 0);
9428f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
9438f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9448f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qianinline bool KURL::protocolIs(const String& string, const char* protocol)
9458f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian{
9468f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian    return WebCore::protocolIs(string, protocol);
9478f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian}
9488f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9490bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochbool protocolHostAndPortAreEqual(const KURL& a, const KURL& b)
9500bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch{
9510bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    if (a.parsed().scheme.end() != b.parsed().scheme.end())
9520bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        return false;
9530bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
9540bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    int hostStartA = a.hostStart();
95521939df44de1705786c545cd1bf519d47250322dBen Murdoch    int hostLengthA = a.hostEnd() - hostStartA;
9560bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    int hostStartB = b.hostStart();
95721939df44de1705786c545cd1bf519d47250322dBen Murdoch    int hostLengthB = b.hostEnd() - b.hostStart();
95821939df44de1705786c545cd1bf519d47250322dBen Murdoch    if (hostLengthA != hostLengthB)
9590bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        return false;
9600bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
9610bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    // Check the scheme
9620bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    for (int i = 0; i < a.parsed().scheme.end(); ++i)
9630bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        if (a.string()[i] != b.string()[i])
9640bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            return false;
9650bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
9660bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    // And the host
96721939df44de1705786c545cd1bf519d47250322dBen Murdoch    for (int i = 0; i < hostLengthA; ++i)
96821939df44de1705786c545cd1bf519d47250322dBen Murdoch        if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
9690bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            return false;
9700bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
9710bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    if (a.port() != b.port())
9720bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        return false;
9730bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
9740bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    return true;
9750bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch}
9760bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
9778f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian} // namespace WebCore
9788f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian
9798f72e70a9fd78eec56623b3a62e68f16b7b27e28Feng Qian#endif // USE(GOOGLEURL)
980