1/*
2 * Copyright (C) 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "WebArchiveDumpSupport.h"
28
29#include <CoreFoundation/CoreFoundation.h>
30#include <CFNetwork/CFNetwork.h>
31#include <wtf/RetainPtr.h>
32
33extern "C" {
34
35CFURLRef CFURLResponseGetURL(CFURLResponseRef response);
36CFStringRef CFURLResponseGetMIMEType(CFURLResponseRef response);
37CFStringRef CFURLResponseGetTextEncodingName(CFURLResponseRef response);
38SInt64 CFURLResponseGetExpectedContentLength(CFURLResponseRef response);
39CFHTTPMessageRef CFURLResponseGetHTTPResponse(CFURLResponseRef response);
40
41CFTypeID CFURLResponseGetTypeID(void);
42
43}
44
45static void convertMIMEType(CFMutableStringRef mimeType)
46{
47#ifdef BUILDING_ON_LEOPARD
48    // Workaround for <rdar://problem/5539824> on Leopard
49    if (CFStringCompare(mimeType, CFSTR("text/xml"), kCFCompareAnchored | kCFCompareCaseInsensitive) == kCFCompareEqualTo)
50        CFStringReplaceAll(mimeType, CFSTR("application/xml"));
51#endif
52    // Workaround for <rdar://problem/6234318> with Dashcode 2.0
53    if (CFStringCompare(mimeType, CFSTR("application/x-javascript"), kCFCompareAnchored | kCFCompareCaseInsensitive) == kCFCompareEqualTo)
54        CFStringReplaceAll(mimeType, CFSTR("text/javascript"));
55}
56
57static void convertWebResourceDataToString(CFMutableDictionaryRef resource)
58{
59    CFMutableStringRef mimeType = (CFMutableStringRef)CFDictionaryGetValue(resource, CFSTR("WebResourceMIMEType"));
60    CFStringLowercase(mimeType, CFLocaleGetSystem());
61    convertMIMEType(mimeType);
62
63    CFArrayRef supportedMIMETypes = supportedNonImageMIMETypes();
64    if (CFStringHasPrefix(mimeType, CFSTR("text/")) || CFArrayContainsValue(supportedMIMETypes, CFRangeMake(0, CFArrayGetCount(supportedMIMETypes)), mimeType)) {
65        CFStringRef textEncodingName = static_cast<CFStringRef>(CFDictionaryGetValue(resource, CFSTR("WebResourceTextEncodingName")));
66        CFStringEncoding stringEncoding;
67        if (textEncodingName && CFStringGetLength(textEncodingName))
68            stringEncoding = CFStringConvertIANACharSetNameToEncoding(textEncodingName);
69        else
70            stringEncoding = kCFStringEncodingUTF8;
71
72        CFDataRef data = static_cast<CFDataRef>(CFDictionaryGetValue(resource, CFSTR("WebResourceData")));
73        RetainPtr<CFStringRef> dataAsString(AdoptCF, CFStringCreateFromExternalRepresentation(kCFAllocatorDefault, data, stringEncoding));
74        if (dataAsString)
75            CFDictionarySetValue(resource, CFSTR("WebResourceData"), dataAsString.get());
76    }
77}
78
79static void normalizeHTTPResponseHeaderFields(CFMutableDictionaryRef fields)
80{
81    // Normalize headers
82    if (CFDictionaryContainsKey(fields, CFSTR("Date")))
83        CFDictionarySetValue(fields, CFSTR("Date"), CFSTR("Sun, 16 Nov 2008 17:00:00 GMT"));
84    if (CFDictionaryContainsKey(fields, CFSTR("Last-Modified")))
85        CFDictionarySetValue(fields, CFSTR("Last-Modified"), CFSTR("Sun, 16 Nov 2008 16:55:00 GMT"));
86    if (CFDictionaryContainsKey(fields, CFSTR("Etag")))
87        CFDictionarySetValue(fields, CFSTR("Etag"), CFSTR("\"301925-21-45c7d72d3e780\""));
88    if (CFDictionaryContainsKey(fields, CFSTR("Server")))
89        CFDictionarySetValue(fields, CFSTR("Server"), CFSTR("Apache/2.2.9 (Unix) mod_ssl/2.2.9 OpenSSL/0.9.7l PHP/5.2.6"));
90
91    // Remove headers
92    CFDictionaryRemoveValue(fields, CFSTR("Connection"));
93    CFDictionaryRemoveValue(fields, CFSTR("Keep-Alive"));
94}
95
96static void normalizeWebResourceURL(CFMutableStringRef webResourceURL)
97{
98    static CFIndex fileUrlLength = CFStringGetLength(CFSTR("file://"));
99    CFRange layoutTestsWebArchivePathRange = CFStringFind(webResourceURL, CFSTR("/LayoutTests/"), kCFCompareBackwards);
100    if (layoutTestsWebArchivePathRange.location == kCFNotFound)
101        return;
102    CFRange currentWorkingDirectoryRange = CFRangeMake(fileUrlLength, layoutTestsWebArchivePathRange.location - fileUrlLength);
103    CFStringReplace(webResourceURL, currentWorkingDirectoryRange, CFSTR(""));
104}
105
106static void convertWebResourceResponseToDictionary(CFMutableDictionaryRef propertyList)
107{
108    CFDataRef responseData = static_cast<CFDataRef>(CFDictionaryGetValue(propertyList, CFSTR("WebResourceResponse"))); // WebResourceResponseKey in WebResource.m
109    if (CFGetTypeID(responseData) != CFDataGetTypeID())
110        return;
111
112    RetainPtr<CFURLResponseRef> response(AdoptCF, createCFURLResponseFromResponseData(responseData));
113    if (!response)
114        return;
115
116    RetainPtr<CFMutableDictionaryRef> responseDictionary(AdoptCF, CFDictionaryCreateMutable(kCFAllocatorDefault, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks));
117
118    RetainPtr<CFMutableStringRef> urlString(AdoptCF, CFStringCreateMutableCopy(kCFAllocatorDefault, 0, CFURLGetString(CFURLResponseGetURL(response.get()))));
119    normalizeWebResourceURL(urlString.get());
120    CFDictionarySetValue(responseDictionary.get(), CFSTR("URL"), urlString.get());
121
122    RetainPtr<CFMutableStringRef> mimeTypeString(AdoptCF, CFStringCreateMutableCopy(kCFAllocatorDefault, 0, CFURLResponseGetMIMEType(response.get())));
123    convertMIMEType(mimeTypeString.get());
124    CFDictionarySetValue(responseDictionary.get(), CFSTR("MIMEType"), mimeTypeString.get());
125
126    CFStringRef textEncodingName = CFURLResponseGetTextEncodingName(response.get());
127    if (textEncodingName)
128        CFDictionarySetValue(responseDictionary.get(), CFSTR("textEncodingName"), textEncodingName);
129
130    SInt64 expectedContentLength = CFURLResponseGetExpectedContentLength(response.get());
131    RetainPtr<CFNumberRef> expectedContentLengthNumber(AdoptCF, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &expectedContentLength));
132    CFDictionarySetValue(responseDictionary.get(), CFSTR("expectedContentLength"), expectedContentLengthNumber.get());
133
134    if (CFHTTPMessageRef httpMessage = CFURLResponseGetHTTPResponse(response.get())) {
135        RetainPtr<CFDictionaryRef> allHeaders(AdoptCF, CFHTTPMessageCopyAllHeaderFields(httpMessage));
136        RetainPtr<CFMutableDictionaryRef> allHeaderFields(AdoptCF, CFDictionaryCreateMutableCopy(kCFAllocatorDefault, 0, allHeaders.get()));
137        normalizeHTTPResponseHeaderFields(allHeaderFields.get());
138        CFDictionarySetValue(responseDictionary.get(), CFSTR("allHeaderFields"), allHeaderFields.get());
139
140        CFIndex statusCode = CFHTTPMessageGetResponseStatusCode(httpMessage);
141        RetainPtr<CFNumberRef> statusCodeNumber(AdoptCF, CFNumberCreate(kCFAllocatorDefault, kCFNumberCFIndexType, &statusCode));
142        CFDictionarySetValue(responseDictionary.get(), CFSTR("statusCode"), statusCodeNumber.get());
143    }
144
145    CFDictionarySetValue(propertyList, CFSTR("WebResourceResponse"), responseDictionary.get());
146}
147
148static CFComparisonResult compareResourceURLs(const void *val1, const void *val2, void *context)
149{
150    CFStringRef url1 = static_cast<CFStringRef>(CFDictionaryGetValue(static_cast<CFDictionaryRef>(val1), CFSTR("WebResourceURL")));
151    CFStringRef url2 = static_cast<CFStringRef>(CFDictionaryGetValue(static_cast<CFDictionaryRef>(val2), CFSTR("WebResourceURL")));
152
153    return CFStringCompare(url1, url2, kCFCompareAnchored);
154}
155
156CFStringRef createXMLStringFromWebArchiveData(CFDataRef webArchiveData)
157{
158    CFErrorRef error = 0;
159    CFPropertyListFormat format = kCFPropertyListBinaryFormat_v1_0;
160
161#if defined(BUILDING_ON_TIGER) || defined(BUILDING_ON_LEOPARD)
162    CFIndex bytesCount = CFDataGetLength(webArchiveData);
163    RetainPtr<CFReadStreamRef> readStream(AdoptCF, CFReadStreamCreateWithBytesNoCopy(kCFAllocatorDefault, CFDataGetBytePtr(webArchiveData), bytesCount, kCFAllocatorNull));
164    CFReadStreamOpen(readStream.get());
165    RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, (CFMutableDictionaryRef)CFPropertyListCreateFromStream(kCFAllocatorDefault, readStream.get(), bytesCount, kCFPropertyListMutableContainersAndLeaves, &format, 0));
166    CFReadStreamClose(readStream.get());
167#else
168    RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, (CFMutableDictionaryRef)CFPropertyListCreateWithData(kCFAllocatorDefault, webArchiveData, kCFPropertyListMutableContainersAndLeaves, &format, &error));
169#endif
170
171    if (!propertyList) {
172        if (error)
173            return CFErrorCopyDescription(error);
174        return static_cast<CFStringRef>(CFRetain(CFSTR("An unknown error occurred converting data to property list.")));
175    }
176
177    RetainPtr<CFMutableArrayRef> resources(AdoptCF, CFArrayCreateMutable(kCFAllocatorDefault, 0, &kCFTypeArrayCallBacks));
178    CFArrayAppendValue(resources.get(), propertyList.get());
179
180    while (CFArrayGetCount(resources.get())) {
181        RetainPtr<CFMutableDictionaryRef> resourcePropertyList = (CFMutableDictionaryRef)CFArrayGetValueAtIndex(resources.get(), 0);
182        CFArrayRemoveValueAtIndex(resources.get(), 0);
183
184        CFMutableDictionaryRef mainResource = (CFMutableDictionaryRef)CFDictionaryGetValue(resourcePropertyList.get(), CFSTR("WebMainResource"));
185        normalizeWebResourceURL((CFMutableStringRef)CFDictionaryGetValue(mainResource, CFSTR("WebResourceURL")));
186        convertWebResourceDataToString(mainResource);
187
188        // Add subframeArchives to list for processing
189        CFMutableArrayRef subframeArchives = (CFMutableArrayRef)CFDictionaryGetValue(resourcePropertyList.get(), CFSTR("WebSubframeArchives")); // WebSubframeArchivesKey in WebArchive.m
190        if (subframeArchives)
191            CFArrayAppendArray(resources.get(), subframeArchives, CFRangeMake(0, CFArrayGetCount(subframeArchives)));
192
193        CFMutableArrayRef subresources = (CFMutableArrayRef)CFDictionaryGetValue(resourcePropertyList.get(), CFSTR("WebSubresources")); // WebSubresourcesKey in WebArchive.m
194        if (!subresources)
195            continue;
196
197        CFIndex subresourcesCount = CFArrayGetCount(subresources);
198        for (CFIndex i = 0; i < subresourcesCount; ++i) {
199            CFMutableDictionaryRef subresourcePropertyList = (CFMutableDictionaryRef)CFArrayGetValueAtIndex(subresources, i);
200            normalizeWebResourceURL((CFMutableStringRef)CFDictionaryGetValue(subresourcePropertyList, CFSTR("WebResourceURL")));
201            convertWebResourceResponseToDictionary(subresourcePropertyList);
202            convertWebResourceDataToString(subresourcePropertyList);
203        }
204
205        // Sort the subresources so they're always in a predictable order for the dump
206        CFArraySortValues(subresources, CFRangeMake(0, CFArrayGetCount(subresources)), compareResourceURLs, 0);
207    }
208
209    error = 0;
210
211#if defined(BUILDING_ON_TIGER) || defined(BUILDING_ON_LEOPARD)
212    RetainPtr<CFDataRef> xmlData(AdoptCF, CFPropertyListCreateXMLData(kCFAllocatorDefault, propertyList.get()));
213#else
214    RetainPtr<CFDataRef> xmlData(AdoptCF, CFPropertyListCreateData(kCFAllocatorDefault, propertyList.get(), kCFPropertyListXMLFormat_v1_0, 0, &error));
215#endif
216
217    if (!xmlData) {
218        if (error)
219            return CFErrorCopyDescription(error);
220        return static_cast<CFStringRef>(CFRetain(CFSTR("An unknown error occurred converting property list to data.")));
221    }
222
223    RetainPtr<CFStringRef> xmlString(AdoptCF, CFStringCreateFromExternalRepresentation(kCFAllocatorDefault, xmlData.get(), kCFStringEncodingUTF8));
224    RetainPtr<CFMutableStringRef> string(AdoptCF, CFStringCreateMutableCopy(kCFAllocatorDefault, 0, xmlString.get()));
225
226    // Replace "Apple Computer" with "Apple" in the DTD declaration.
227    CFStringFindAndReplace(string.get(), CFSTR("-//Apple Computer//"), CFSTR("-//Apple//"), CFRangeMake(0, CFStringGetLength(string.get())), 0);
228
229    return string.releaseRef();
230}
231