1/*
2 * Copyright (C) 2005 Apple Computer, Inc.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#import <WebKit/WebNSDataExtras.h>
30#import <WebKit/WebNSDataExtrasPrivate.h>
31
32#import <wtf/Assertions.h>
33
34@interface NSString (WebNSDataExtrasInternal)
35- (NSString *)_web_capitalizeRFC822HeaderFieldName;
36@end
37
38@implementation NSString (WebNSDataExtrasInternal)
39
40-(NSString *)_web_capitalizeRFC822HeaderFieldName
41{
42    CFStringRef name = (CFStringRef)self;
43    NSString *result = nil;
44
45    CFIndex i;
46    CFIndex len = CFStringGetLength(name);
47    char *charPtr = NULL;
48    UniChar *uniCharPtr = NULL;
49    Boolean useUniCharPtr = FALSE;
50    Boolean shouldCapitalize = TRUE;
51    Boolean somethingChanged = FALSE;
52
53    for (i = 0; i < len; i ++) {
54        UniChar ch = CFStringGetCharacterAtIndex(name, i);
55        Boolean replace = FALSE;
56        if (shouldCapitalize && ch >= 'a' && ch <= 'z') {
57            ch = ch + 'A' - 'a';
58            replace = TRUE;
59        }
60        else if (!shouldCapitalize && ch >= 'A' && ch <= 'Z') {
61            ch = ch + 'a' - 'A';
62            replace = TRUE;
63        }
64        if (replace) {
65            if (!somethingChanged) {
66                somethingChanged = TRUE;
67                if (CFStringGetBytes(name, CFRangeMake(0, len), kCFStringEncodingISOLatin1, 0, FALSE, NULL, 0, NULL) == len) {
68                    // Can be encoded in ISOLatin1
69                    useUniCharPtr = FALSE;
70                    charPtr = CFAllocatorAllocate(NULL, len + 1, 0);
71                    CFStringGetCString(name, charPtr, len+1, kCFStringEncodingISOLatin1);
72                }
73                else {
74                    useUniCharPtr = TRUE;
75                    uniCharPtr = CFAllocatorAllocate(NULL, len * sizeof(UniChar), 0);
76                    CFStringGetCharacters(name, CFRangeMake(0, len), uniCharPtr);
77                }
78            }
79            if (useUniCharPtr) {
80                uniCharPtr[i] = ch;
81            }
82            else {
83                charPtr[i] = ch;
84            }
85        }
86        if (ch == '-') {
87            shouldCapitalize = TRUE;
88        }
89        else {
90            shouldCapitalize = FALSE;
91        }
92    }
93    if (somethingChanged) {
94        if (useUniCharPtr) {
95            result = (NSString *)CFMakeCollectable(CFStringCreateWithCharactersNoCopy(NULL, uniCharPtr, len, NULL));
96        }
97        else {
98            result = (NSString *)CFMakeCollectable(CFStringCreateWithCStringNoCopy(NULL, charPtr, kCFStringEncodingISOLatin1, NULL));
99        }
100    }
101    else {
102        result = [self retain];
103    }
104
105    return [result autorelease];
106}
107
108@end
109
110@implementation NSData (WebKitExtras)
111
112-(NSString *)_webkit_guessedMIMETypeForXML
113{
114    int length = [self length];
115    const UInt8 *bytes = [self bytes];
116
117#define CHANNEL_TAG_LENGTH 7
118
119    const char *p = (const char *)bytes;
120    int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (CHANNEL_TAG_LENGTH - 1);
121
122    BOOL foundRDF = false;
123
124    while (remaining > 0) {
125        // Look for a "<".
126        const char *hit = memchr(p, '<', remaining);
127        if (!hit) {
128            break;
129        }
130
131        // We are trying to identify RSS or Atom. RSS has a top-level
132        // element of either <rss> or <rdf>. However, there are
133        // non-RSS RDF files, so in the case of <rdf> we further look
134        // for a <channel> element. In the case of an Atom file, a
135        // top-level <feed> element is all we need to see. Only tags
136        // starting with <? or <! can precede the root element. We
137        // bail if we don't find an <rss>, <feed> or <rdf> element
138        // right after those.
139
140        if (foundRDF) {
141            if (strncasecmp(hit, "<channel", strlen("<channel")) == 0) {
142                return @"application/rss+xml";
143            }
144        } else if (strncasecmp(hit, "<rdf", strlen("<rdf")) == 0) {
145            foundRDF = TRUE;
146        } else if (strncasecmp(hit, "<rss", strlen("<rss")) == 0) {
147            return @"application/rss+xml";
148        } else if (strncasecmp(hit, "<feed", strlen("<feed")) == 0) {
149            return @"application/atom+xml";
150        } else if (strncasecmp(hit, "<?", strlen("<?")) != 0 && strncasecmp(hit, "<!", strlen("<!")) != 0) {
151            return nil;
152        }
153
154        // Skip the "<" and continue.
155        remaining -= (hit + 1) - p;
156        p = hit + 1;
157    }
158
159    return nil;
160}
161
162-(NSString *)_webkit_guessedMIMEType
163{
164#define JPEG_MAGIC_NUMBER_LENGTH 4
165#define SCRIPT_TAG_LENGTH 7
166#define TEXT_HTML_LENGTH 9
167#define VCARD_HEADER_LENGTH 11
168#define VCAL_HEADER_LENGTH 15
169
170    NSString *MIMEType = [self _webkit_guessedMIMETypeForXML];
171    if ([MIMEType length])
172        return MIMEType;
173
174    int length = [self length];
175    const char *bytes = [self bytes];
176
177    const char *p = bytes;
178    int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (SCRIPT_TAG_LENGTH - 1);
179    while (remaining > 0) {
180        // Look for a "<".
181        const char *hit = memchr(p, '<', remaining);
182        if (!hit) {
183            break;
184        }
185
186        // If we found a "<", look for "<html>" or "<a " or "<script".
187        if (strncasecmp(hit, "<html>",  strlen("<html>")) == 0 ||
188            strncasecmp(hit, "<a ",     strlen("<a ")) == 0 ||
189            strncasecmp(hit, "<script", strlen("<script")) == 0 ||
190            strncasecmp(hit, "<title>", strlen("<title>")) == 0) {
191            return @"text/html";
192        }
193
194        // Skip the "<" and continue.
195        remaining -= (hit + 1) - p;
196        p = hit + 1;
197    }
198
199    // Test for a broken server which has sent the content type as part of the content.
200    // This code could be improved to look for other mime types.
201    p = bytes;
202    remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (TEXT_HTML_LENGTH - 1);
203    while (remaining > 0) {
204        // Look for a "t" or "T".
205        const char *hit = NULL;
206        const char *lowerhit = memchr(p, 't', remaining);
207        const char *upperhit = memchr(p, 'T', remaining);
208        if (!lowerhit && !upperhit) {
209            break;
210        }
211        if (!lowerhit) {
212            hit = upperhit;
213        }
214        else if (!upperhit) {
215            hit = lowerhit;
216        }
217        else {
218            hit = MIN(lowerhit, upperhit);
219        }
220
221        // If we found a "t/T", look for "text/html".
222        if (strncasecmp(hit, "text/html", TEXT_HTML_LENGTH) == 0) {
223            return @"text/html";
224        }
225
226        // Skip the "t/T" and continue.
227        remaining -= (hit + 1) - p;
228        p = hit + 1;
229    }
230
231    if ((length >= VCARD_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCARD", VCARD_HEADER_LENGTH) == 0) {
232        return @"text/vcard";
233    }
234    if ((length >= VCAL_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCALENDAR", VCAL_HEADER_LENGTH) == 0) {
235        return @"text/calendar";
236    }
237
238    // Test for plain text.
239    int i;
240    for(i=0; i<length; i++){
241        char c = bytes[i];
242        if ((c < 0x20 || c > 0x7E) && (c != '\t' && c != '\r' && c != '\n')) {
243            break;
244        }
245    }
246    if (i == length) {
247        // Didn't encounter any bad characters, looks like plain text.
248        return @"text/plain";
249    }
250
251    // Looks like this is a binary file.
252
253    // Sniff for the JPEG magic number.
254    if ((length >= JPEG_MAGIC_NUMBER_LENGTH) && strncmp(bytes, "\xFF\xD8\xFF\xE0", JPEG_MAGIC_NUMBER_LENGTH) == 0) {
255        return @"image/jpeg";
256    }
257
258#undef JPEG_MAGIC_NUMBER_LENGTH
259#undef SCRIPT_TAG_LENGTH
260#undef TEXT_HTML_LENGTH
261#undef VCARD_HEADER_LENGTH
262#undef VCAL_HEADER_LENGTH
263
264    return nil;
265}
266
267@end
268
269@implementation NSData (WebNSDataExtras)
270
271-(BOOL)_web_isCaseInsensitiveEqualToCString:(const char *)string
272{
273    ASSERT(string);
274
275    const char *bytes = [self bytes];
276    return strncasecmp(bytes, string, [self length]) == 0;
277}
278
279static const UInt8 *_findEOL(const UInt8 *bytes, CFIndex len) {
280
281    // According to the HTTP specification EOL is defined as
282    // a CRLF pair.  Unfortunately, some servers will use LF
283    // instead.  Worse yet, some servers will use a combination
284    // of both (e.g. <header>CRLFLF<body>), so findEOL needs
285    // to be more forgiving.  It will now accept CRLF, LF, or
286    // CR.
287    //
288    // It returns NULL if EOL is not found or it will return
289    // a pointer to the first terminating character.
290    CFIndex i;
291    for (i = 0;  i < len; i++)
292    {
293        UInt8 c = bytes[i];
294        if ('\n' == c) return bytes + i;
295        if ('\r' == c)
296        {
297            // Check to see if spanning buffer bounds
298            // (CRLF is across reads).  If so, wait for
299            // next read.
300            if (i + 1 == len) break;
301
302            return bytes + i;
303        }
304    }
305
306    return NULL;
307}
308
309-(NSMutableDictionary *)_webkit_parseRFC822HeaderFields
310{
311    NSMutableDictionary *headerFields = [NSMutableDictionary dictionary];
312
313    const UInt8 *bytes = [self bytes];
314    unsigned length = [self length];
315    NSString *lastKey = nil;
316    const UInt8 *eol;
317
318    // Loop over lines until we're past the header, or we can't find any more end-of-lines
319    while ((eol = _findEOL(bytes, length))) {
320        const UInt8 *line = bytes;
321        SInt32 lineLength = eol - bytes;
322
323        // Move bytes to the character after the terminator as returned by _findEOL.
324        bytes = eol + 1;
325        if (('\r' == *eol) && ('\n' == *bytes)) {
326            bytes++; // Safe since _findEOL won't return a spanning CRLF.
327        }
328
329        length -= (bytes - line);
330        if (lineLength == 0) {
331            // Blank line; we're at the end of the header
332            break;
333        }
334        else if (*line == ' ' || *line == '\t') {
335            // Continuation of the previous header
336            if (!lastKey) {
337                // malformed header; ignore it and continue
338                continue;
339            }
340            else {
341                // Merge the continuation of the previous header
342                NSString *currentValue = [headerFields objectForKey:lastKey];
343                NSString *newValue = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, lineLength, kCFStringEncodingISOLatin1, FALSE));
344                ASSERT(currentValue);
345                ASSERT(newValue);
346                NSString *mergedValue = [[NSString alloc] initWithFormat:@"%@%@", currentValue, newValue];
347                [headerFields setObject:(NSString *)mergedValue forKey:lastKey];
348                [newValue release];
349                [mergedValue release];
350                // Note: currentValue is autoreleased
351            }
352        }
353        else {
354            // Brand new header
355            const UInt8 *colon;
356            for (colon = line; *colon != ':' && colon != eol; colon ++) {
357                // empty loop
358            }
359            if (colon == eol) {
360                // malformed header; ignore it and continue
361                continue;
362            }
363            else {
364                lastKey = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, colon - line, kCFStringEncodingISOLatin1, FALSE));
365                [lastKey autorelease];
366                NSString *value = [lastKey _web_capitalizeRFC822HeaderFieldName];
367                lastKey = value;
368                for (colon++; colon != eol; colon++) {
369                    if (*colon != ' ' && *colon != '\t') {
370                        break;
371                    }
372                }
373                if (colon == eol) {
374                    value = [[NSString alloc] initWithString:@""];
375                    [value autorelease];
376                }
377                else {
378                    value = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, colon, eol-colon, kCFStringEncodingISOLatin1, FALSE));
379                    [value autorelease];
380                }
381                NSString *oldValue = [headerFields objectForKey:lastKey];
382                if (oldValue) {
383                    NSString *newValue = [[NSString alloc] initWithFormat:@"%@, %@", oldValue, value];
384                    value = newValue;
385                    [newValue autorelease];
386                }
387                [headerFields setObject:(NSString *)value forKey:lastKey];
388            }
389        }
390    }
391
392    return headerFields;
393}
394
395- (BOOL)_web_startsWithBlankLine
396{
397    return [self length] > 0 && ((const char *)[self bytes])[0] == '\n';
398}
399
400- (NSInteger)_web_locationAfterFirstBlankLine
401{
402    const char *bytes = (const char *)[self bytes];
403    unsigned length = [self length];
404
405    unsigned i;
406    for (i = 0; i < length - 4; i++) {
407
408        //  Support for Acrobat. It sends "\n\n".
409        if (bytes[i] == '\n' && bytes[i+1] == '\n') {
410            return i+2;
411        }
412
413        // Returns the position after 2 CRLF's or 1 CRLF if it is the first line.
414        if (bytes[i] == '\r' && bytes[i+1] == '\n') {
415            i += 2;
416            if (i == 2) {
417                return i;
418            } else if (bytes[i] == '\n') {
419                // Support for Director. It sends "\r\n\n" (3880387).
420                return i+1;
421            } else if (bytes[i] == '\r' && bytes[i+1] == '\n') {
422                // Support for Flash. It sends "\r\n\r\n" (3758113).
423                return i+2;
424            }
425        }
426    }
427    return NSNotFound;
428}
429
430@end
431