1/*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 *           (C) 1999 Antti Koivisto (koivisto@kde.org)
4 *           (C) 2001 Dirk Mueller (mueller@kde.org)
5 *           (C) 2006 Alexey Proskuryakov (ap@webkit.org)
6 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB.  If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 */
23
24#include "config.h"
25#include "KURL.h"
26#include "LinkHash.h"
27#include "PlatformString.h"
28#include <wtf/text/AtomicString.h>
29#include <wtf/text/StringHash.h>
30#include <wtf/text/StringImpl.h>
31
32namespace WebCore {
33
34static inline size_t findSlashDotDotSlash(const UChar* characters, size_t length, size_t position)
35{
36    if (length < 4)
37        return notFound;
38    size_t loopLimit = length - 3;
39    for (size_t i = position; i < loopLimit; ++i) {
40        if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
41            return i;
42    }
43    return notFound;
44}
45
46static inline size_t findSlashSlash(const UChar* characters, size_t length, size_t position)
47{
48    if (length < 2)
49        return notFound;
50    size_t loopLimit = length - 1;
51    for (size_t i = position; i < loopLimit; ++i) {
52        if (characters[i] == '/' && characters[i + 1] == '/')
53            return i;
54    }
55    return notFound;
56}
57
58static inline size_t findSlashDotSlash(const UChar* characters, size_t length, size_t position)
59{
60    if (length < 3)
61        return notFound;
62    size_t loopLimit = length - 2;
63    for (size_t i = position; i < loopLimit; ++i) {
64        if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
65            return i;
66    }
67    return notFound;
68}
69
70static inline bool containsColonSlashSlash(const UChar* characters, unsigned length)
71{
72    if (length < 3)
73        return false;
74    unsigned loopLimit = length - 2;
75    for (unsigned i = 0; i < loopLimit; ++i) {
76        if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
77            return true;
78    }
79    return false;
80}
81
82static inline void squeezeOutNullCharacters(Vector<UChar, 512>& string)
83{
84    size_t size = string.size();
85    size_t i = 0;
86    for (i = 0; i < size; ++i) {
87        if (!string[i])
88            break;
89    }
90    if (i == size)
91        return;
92    size_t j = i;
93    for (++i; i < size; ++i) {
94        if (UChar character = string[i])
95            string[j++] = character;
96    }
97    ASSERT(j < size);
98    string.shrink(j);
99}
100
101static void cleanSlashDotDotSlashes(Vector<UChar, 512>& path, size_t firstSlash)
102{
103    size_t slash = firstSlash;
104    do {
105        size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound;
106        // Don't remove the host, i.e. http://foo.org/../foo.html
107        if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) {
108            path[slash] = 0;
109            path[slash + 1] = 0;
110            path[slash + 2] = 0;
111        } else {
112            for (size_t i = previousSlash; i < slash + 3; ++i)
113                path[i] = 0;
114        }
115        slash += 3;
116    } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound);
117    squeezeOutNullCharacters(path);
118}
119
120static void mergeDoubleSlashes(Vector<UChar, 512>& path, size_t firstSlash)
121{
122    size_t refPos = find(path.data(), path.size(), '#');
123    if (!refPos || refPos == notFound)
124        refPos = path.size();
125
126    size_t slash = firstSlash;
127    while (slash < refPos) {
128        if (!slash || path[slash - 1] != ':')
129            path[slash++] = 0;
130        else
131            slash += 2;
132        if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound)
133            break;
134    }
135    squeezeOutNullCharacters(path);
136}
137
138static void cleanSlashDotSlashes(Vector<UChar, 512>& path, size_t firstSlash)
139{
140    size_t slash = firstSlash;
141    do {
142        path[slash] = 0;
143        path[slash + 1] = 0;
144        slash += 2;
145    } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound);
146    squeezeOutNullCharacters(path);
147}
148
149static inline void cleanPath(Vector<UChar, 512>& path)
150{
151    // FIXME: Should not do this in the query or anchor part of the URL.
152    size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0);
153    if (firstSlash != notFound)
154        cleanSlashDotDotSlashes(path, firstSlash);
155
156    // FIXME: Should not do this in the query part.
157    firstSlash = findSlashSlash(path.data(), path.size(), 0);
158    if (firstSlash != notFound)
159        mergeDoubleSlashes(path, firstSlash);
160
161    // FIXME: Should not do this in the query or anchor part.
162    firstSlash = findSlashDotSlash(path.data(), path.size(), 0);
163    if (firstSlash != notFound)
164        cleanSlashDotSlashes(path, firstSlash);
165}
166
167static inline bool matchLetter(UChar c, UChar lowercaseLetter)
168{
169    return (c | 0x20) == lowercaseLetter;
170}
171
172static inline bool needsTrailingSlash(const UChar* characters, unsigned length)
173{
174    if (length < 6)
175        return false;
176    if (!matchLetter(characters[0], 'h')
177            || !matchLetter(characters[1], 't')
178            || !matchLetter(characters[2], 't')
179            || !matchLetter(characters[3], 'p'))
180        return false;
181    if (!(characters[4] == ':'
182            || (matchLetter(characters[4], 's') && characters[5] == ':')))
183        return false;
184
185    unsigned pos = characters[4] == ':' ? 5 : 6;
186
187    // Skip initial two slashes if present.
188    if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
189        pos += 2;
190
191    // Find next slash.
192    while (pos < length && characters[pos] != '/')
193        ++pos;
194
195    return pos == length;
196}
197
198static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length)
199{
200    return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length));
201}
202
203LinkHash visitedLinkHash(const UChar* url, unsigned length)
204{
205    return visitedLinkHashInline(url, length);
206}
207
208static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
209{
210    if (attributeURL.isNull())
211        return;
212
213    const UChar* characters = attributeURL.characters();
214    unsigned length = attributeURL.length();
215
216    // This is a poor man's completeURL. Faster with less memory allocation.
217    // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
218    // For example, it does not handle international domain names properly.
219
220    // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
221    //    1) The "://" could be in the query or anchor.
222    //    2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
223
224    // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
225    // have a query or anchor.
226
227    bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
228
229    if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
230        buffer.append(attributeURL.characters(), attributeURL.length());
231        return;
232    }
233
234
235    if (hasColonSlashSlash) {
236        // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
237        // end of the path, *before* the query or anchor.
238        buffer.append(characters, length);
239        buffer.append('/');
240        return;
241    }
242
243    if (!length)
244        buffer.append(base.string().characters(), base.string().length());
245    else {
246        switch (characters[0]) {
247            case '/':
248                buffer.append(base.string().characters(), base.pathStart());
249                break;
250            case '#':
251                buffer.append(base.string().characters(), base.pathEnd());
252                break;
253            default:
254                buffer.append(base.string().characters(), base.pathAfterLastSlash());
255                break;
256        }
257    }
258    buffer.append(characters, length);
259    cleanPath(buffer);
260    if (needsTrailingSlash(buffer.data(), buffer.size())) {
261        // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
262        // end of the path, *before* the query or anchor.
263        buffer.append('/');
264    }
265
266    return;
267}
268
269void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
270{
271    return visitedURLInline(base, attributeURL, buffer);
272}
273
274LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
275{
276    Vector<UChar, 512> url;
277    visitedURLInline(base, attributeURL, url);
278    if (url.isEmpty())
279        return 0;
280
281    return visitedLinkHashInline(url.data(), url.size());
282}
283
284}  // namespace WebCore
285