1/*
2 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1.  Redistributions of source code must retain the above copyright
12 *     notice, this list of conditions and the following disclaimer.
13 * 2.  Redistributions in binary form must reproduce the above copyright
14 *     notice, this list of conditions and the following disclaimer in the
15 *     documentation and/or other materials provided with the distribution.
16 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
17 *     its contributors may be used to endorse or promote products derived
18 *     from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
21 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
24 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include "config.h"
33#include "HTTPParsers.h"
34#include "ResourceResponseBase.h"
35
36#include "PlatformString.h"
37#include <wtf/text/CString.h>
38#include <wtf/DateMath.h>
39
40using namespace WTF;
41
42namespace WebCore {
43
44// true if there is more to parse
45static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
46{
47    unsigned len = str.length();
48
49    if (fromHttpEquivMeta) {
50        while (pos != len && str[pos] <= ' ')
51            ++pos;
52    } else {
53        while (pos != len && (str[pos] == '\t' || str[pos] == ' '))
54            ++pos;
55    }
56
57    return pos != len;
58}
59
60// Returns true if the function can match the whole token (case insensitive).
61// Note: Might return pos == str.length()
62static inline bool skipToken(const String& str, unsigned& pos, const char* token)
63{
64    unsigned len = str.length();
65
66    while (pos != len && *token) {
67        if (toASCIILower(str[pos]) != *token++)
68            return false;
69        ++pos;
70    }
71
72    return true;
73}
74
75ContentDispositionType contentDispositionType(const String& contentDisposition)
76{
77    if (contentDisposition.isEmpty())
78        return ContentDispositionNone;
79
80    // Some broken sites just send
81    // Content-Disposition: ; filename="file"
82    // screen those out here.
83    if (contentDisposition.startsWith(";"))
84        return ContentDispositionNone;
85
86    if (contentDisposition.startsWith("inline", false))
87        return ContentDispositionInline;
88
89    // Some broken sites just send
90    // Content-Disposition: filename="file"
91    // without a disposition token... screen those out.
92    if (contentDisposition.startsWith("filename", false))
93        return ContentDispositionNone;
94
95    // Also in use is Content-Disposition: name="file"
96    if (contentDisposition.startsWith("name", false))
97        return ContentDispositionNone;
98
99    // We have a content-disposition of "attachment" or unknown.
100    // RFC 2183, section 2.8 says that an unknown disposition
101    // value should be treated as "attachment"
102    return ContentDispositionAttachment;
103}
104
105bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
106{
107    unsigned len = refresh.length();
108    unsigned pos = 0;
109
110    if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
111        return false;
112
113    while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
114        ++pos;
115
116    if (pos == len) { // no URL
117        url = String();
118        bool ok;
119        delay = refresh.stripWhiteSpace().toDouble(&ok);
120        return ok;
121    } else {
122        bool ok;
123        delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
124        if (!ok)
125            return false;
126
127        ++pos;
128        skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
129        unsigned urlStartPos = pos;
130        if (refresh.find("url", urlStartPos, false) == urlStartPos) {
131            urlStartPos += 3;
132            skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
133            if (refresh[urlStartPos] == '=') {
134                ++urlStartPos;
135                skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
136            } else
137                urlStartPos = pos;  // e.g. "Refresh: 0; url.html"
138        }
139
140        unsigned urlEndPos = len;
141
142        if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
143            UChar quotationMark = refresh[urlStartPos];
144            urlStartPos++;
145            while (urlEndPos > urlStartPos) {
146                urlEndPos--;
147                if (refresh[urlEndPos] == quotationMark)
148                    break;
149            }
150
151            // https://bugs.webkit.org/show_bug.cgi?id=27868
152            // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
153            // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
154            // after the opening quote instead.
155            if (urlEndPos == urlStartPos)
156                urlEndPos = len;
157        }
158
159        url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
160        return true;
161    }
162}
163
164double parseDate(const String& value)
165{
166    return parseDateFromNullTerminatedCharacters(value.utf8().data());
167}
168
169String filenameFromHTTPContentDisposition(const String& value)
170{
171    Vector<String> keyValuePairs;
172    value.split(';', keyValuePairs);
173
174    unsigned length = keyValuePairs.size();
175    for (unsigned i = 0; i < length; i++) {
176        size_t valueStartPos = keyValuePairs[i].find('=');
177        if (valueStartPos == notFound)
178            continue;
179
180        String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
181
182        if (key.isEmpty() || key != "filename")
183            continue;
184
185        String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
186
187        // Remove quotes if there are any
188        if (value[0] == '\"')
189            value = value.substring(1, value.length() - 2);
190
191        return value;
192    }
193
194    return String();
195}
196
197String extractMIMETypeFromMediaType(const String& mediaType)
198{
199    Vector<UChar, 64> mimeType;
200    unsigned length = mediaType.length();
201    mimeType.reserveCapacity(length);
202    for (unsigned i = 0; i < length; i++) {
203        UChar c = mediaType[i];
204
205        if (c == ';')
206            break;
207
208        // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
209        // type header field, Content-Type. In such cases, the media type string passed here may contain
210        // the multiple values separated by commas. For now, this code ignores text after the first comma,
211        // which prevents it from simply failing to parse such types altogether. Later for better
212        // compatibility we could consider using the first or last valid MIME type instead.
213        // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
214        if (c == ',')
215            break;
216
217        // FIXME: The following is not correct. RFC 2616 allows linear white space before and
218        // after the MIME type, but not within the MIME type itself. And linear white space
219        // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
220        // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
221        if (isSpaceOrNewline(c))
222            continue;
223
224        mimeType.append(c);
225    }
226
227    if (mimeType.size() == length)
228        return mediaType;
229    return String(mimeType.data(), mimeType.size());
230}
231
232String extractCharsetFromMediaType(const String& mediaType)
233{
234    unsigned int pos, len;
235    findCharsetInMediaType(mediaType, pos, len);
236    return mediaType.substring(pos, len);
237}
238
239void findCharsetInMediaType(const String& mediaType, unsigned int& charsetPos, unsigned int& charsetLen, unsigned int start)
240{
241    charsetPos = start;
242    charsetLen = 0;
243
244    size_t pos = start;
245    unsigned length = mediaType.length();
246
247    while (pos < length) {
248        pos = mediaType.find("charset", pos, false);
249        if (pos == notFound || pos == 0) {
250            charsetLen = 0;
251            return;
252        }
253
254        // is what we found a beginning of a word?
255        if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
256            pos += 7;
257            continue;
258        }
259
260        pos += 7;
261
262        // skip whitespace
263        while (pos != length && mediaType[pos] <= ' ')
264            ++pos;
265
266        if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
267            continue;
268
269        while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
270            ++pos;
271
272        // we don't handle spaces within quoted parameter values, because charset names cannot have any
273        unsigned endpos = pos;
274        while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
275            ++endpos;
276
277        charsetPos = pos;
278        charsetLen = endpos - pos;
279        return;
280    }
281}
282
283XSSProtectionDisposition parseXSSProtectionHeader(const String& header)
284{
285    String stippedHeader = header.stripWhiteSpace();
286
287    if (stippedHeader.isEmpty())
288        return XSSProtectionEnabled;
289
290    if (stippedHeader[0] == '0')
291        return XSSProtectionDisabled;
292
293    unsigned length = header.length();
294    unsigned pos = 0;
295    if (stippedHeader[pos++] == '1'
296        && skipWhiteSpace(stippedHeader, pos, false)
297        && stippedHeader[pos++] == ';'
298        && skipWhiteSpace(stippedHeader, pos, false)
299        && skipToken(stippedHeader, pos, "mode")
300        && skipWhiteSpace(stippedHeader, pos, false)
301        && stippedHeader[pos++] == '='
302        && skipWhiteSpace(stippedHeader, pos, false)
303        && skipToken(stippedHeader, pos, "block")
304        && pos == length)
305        return XSSProtectionBlockEnabled;
306
307    return XSSProtectionEnabled;
308}
309
310String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
311{
312    size_t spacePos = statusLine.find(' ');
313    // Remove status code from the status line.
314    spacePos = statusLine.find(' ', spacePos + 1);
315    return statusLine.substring(spacePos + 1);
316}
317
318bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
319{
320    // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
321    // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
322    // We don't support multiple range requests.
323
324    rangeOffset = rangeEnd = rangeSuffixLength = -1;
325
326    // The "bytes" unit identifier should be present.
327    static const char bytesStart[] = "bytes=";
328    if (!range.startsWith(bytesStart, false))
329        return false;
330    String byteRange = range.substring(sizeof(bytesStart) - 1);
331
332    // The '-' character needs to be present.
333    int index = byteRange.find('-');
334    if (index == -1)
335        return false;
336
337    // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
338    // Example:
339    //     -500
340    if (!index) {
341        String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
342        bool ok;
343        long long value = suffixLengthString.toInt64Strict(&ok);
344        if (ok)
345            rangeSuffixLength = value;
346        return true;
347    }
348
349    // Otherwise, the first-byte-position and the last-byte-position are provied.
350    // Examples:
351    //     0-499
352    //     500-
353    String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
354    bool ok;
355    long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
356    if (!ok)
357        return false;
358
359    String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
360    long long lastBytePos = -1;
361    if (!lastBytePosStr.isEmpty()) {
362        lastBytePos = lastBytePosStr.toInt64Strict(&ok);
363        if (!ok)
364            return false;
365    }
366
367    if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
368        return false;
369
370    rangeOffset = firstBytePos;
371    rangeEnd = lastBytePos;
372    return true;
373}
374
375}
376