1/*
2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30#include "config.h"
31
32#include "FrameTestHelpers.h"
33#include "URLTestHelpers.h"
34#include "WebFrame.h"
35#include "WebFrameClient.h"
36#include "WebPageSerializer.h"
37#include "WebPageSerializerClient.h"
38#include "WebScriptSource.h"
39#include "WebSettings.h"
40#include "WebView.h"
41#include "core/dom/Document.h"
42#include "public/platform/Platform.h"
43#include "public/platform/WebString.h"
44#include "public/platform/WebThread.h"
45#include "public/platform/WebURL.h"
46#include "public/platform/WebURLRequest.h"
47#include "public/platform/WebURLResponse.h"
48#include "public/platform/WebUnitTestSupport.h"
49#include "public/web/WebDocument.h"
50#include <gtest/gtest.h>
51
52using namespace blink;
53using WebCore::Document;
54using blink::FrameTestHelpers::runPendingTasks;
55using blink::URLTestHelpers::toKURL;
56using blink::URLTestHelpers::registerMockedURLLoad;
57
58namespace {
59
60class LineReader {
61public:
62    LineReader(const std::string& text) : m_text(text), m_index(0) { }
63    bool getNextLine(std::string* line)
64    {
65        line->clear();
66        if (m_index >= m_text.length())
67            return false;
68
69        size_t endOfLineIndex = m_text.find("\r\n", m_index);
70        if (endOfLineIndex == std::string::npos) {
71            *line = m_text.substr(m_index);
72            m_index = m_text.length();
73        } else {
74            *line = m_text.substr(m_index, endOfLineIndex - m_index);
75            m_index = endOfLineIndex + 2;
76        }
77        return true;
78    }
79
80private:
81    std::string m_text;
82    size_t m_index;
83};
84
85class TestWebFrameClient : public WebFrameClient {
86public:
87    virtual ~TestWebFrameClient() { }
88};
89
90class LengthCountingWebPageSerializerClient : public WebPageSerializerClient {
91public:
92    LengthCountingWebPageSerializerClient(size_t* counter)
93        : m_counter(counter)
94    {
95    }
96
97    virtual void didSerializeDataForFrame(const WebURL& frameURL, const WebCString& data, PageSerializationStatus status) {
98        *m_counter += data.length();
99    }
100
101private:
102    size_t* m_counter;
103};
104
105class WebPageNewSerializeTest : public testing::Test {
106public:
107    WebPageNewSerializeTest()
108        : m_htmlMimeType(WebString::fromUTF8("text/html"))
109        , m_xhtmlMimeType(WebString::fromUTF8("application/xhtml+xml"))
110        , m_cssMimeType(WebString::fromUTF8("text/css"))
111        , m_pngMimeType(WebString::fromUTF8("image/png"))
112        , m_svgMimeType(WebString::fromUTF8("image/svg+xml"))
113    {
114    }
115
116protected:
117    virtual void SetUp()
118    {
119        // Create and initialize the WebView.
120        m_webView = WebView::create(0);
121        m_mainFrame = WebFrame::create(&m_webFrameClient);
122
123        // We want the images to load and JavaScript to be on.
124        WebSettings* settings = m_webView->settings();
125        settings->setImagesEnabled(true);
126        settings->setLoadsImagesAutomatically(true);
127        settings->setJavaScriptEnabled(true);
128
129        m_webView->setMainFrame(m_mainFrame);
130    }
131
132    virtual void TearDown()
133    {
134        Platform::current()->unitTestSupport()->unregisterAllMockedURLs();
135        m_webView->close();
136        m_mainFrame->close();
137    }
138
139    WebURL setUpCSSTestPage()
140    {
141        WebURL topFrameURL = toKURL("http://www.test.com");
142        registerMockedURLLoad(topFrameURL, WebString::fromUTF8("css_test_page.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
143        registerMockedURLLoad(toKURL("http://www.test.com/link_styles.css"), WebString::fromUTF8("link_styles.css"), WebString::fromUTF8("pageserializer/"), cssMimeType());
144        registerMockedURLLoad(toKURL("http://www.test.com/import_style_from_link.css"), WebString::fromUTF8("import_style_from_link.css"), WebString::fromUTF8("pageserializer/"), cssMimeType());
145        registerMockedURLLoad(toKURL("http://www.test.com/import_styles.css"), WebString::fromUTF8("import_styles.css"), WebString::fromUTF8("pageserializer/"), cssMimeType());
146        registerMockedURLLoad(toKURL("http://www.test.com/red_background.png"), WebString::fromUTF8("red_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
147        registerMockedURLLoad(toKURL("http://www.test.com/orange_background.png"), WebString::fromUTF8("orange_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
148        registerMockedURLLoad(toKURL("http://www.test.com/yellow_background.png"), WebString::fromUTF8("yellow_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
149        registerMockedURLLoad(toKURL("http://www.test.com/green_background.png"), WebString::fromUTF8("green_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
150        registerMockedURLLoad(toKURL("http://www.test.com/blue_background.png"), WebString::fromUTF8("blue_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
151        registerMockedURLLoad(toKURL("http://www.test.com/purple_background.png"), WebString::fromUTF8("purple_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
152        registerMockedURLLoad(toKURL("http://www.test.com/ul-dot.png"), WebString::fromUTF8("ul-dot.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
153        registerMockedURLLoad(toKURL("http://www.test.com/ol-dot.png"), WebString::fromUTF8("ol-dot.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
154        return topFrameURL;
155    }
156
157    void loadURLInTopFrame(const WebURL& url)
158    {
159        WebURLRequest urlRequest;
160        urlRequest.initialize();
161        urlRequest.setURL(url);
162        m_webView->mainFrame()->loadRequest(urlRequest);
163        // Make sure any pending request get served.
164        Platform::current()->unitTestSupport()->serveAsynchronousMockedRequests();
165        // Some requests get delayed, run the timer.
166        runPendingTasks();
167        // Server the delayed resources.
168        Platform::current()->unitTestSupport()->serveAsynchronousMockedRequests();
169    }
170
171    const WebString& htmlMimeType() const { return m_htmlMimeType; }
172    const WebString& xhtmlMimeType() const { return m_xhtmlMimeType; }
173    const WebString& cssMimeType() const { return m_cssMimeType; }
174    const WebString& pngMimeType() const { return m_pngMimeType; }
175    const WebString& svgMimeType() const { return m_svgMimeType; }
176
177    static bool resourceVectorContains(const WebVector<WebPageSerializer::Resource>& resources, const char* url, const char* mimeType)
178    {
179        WebURL webURL = WebURL(toKURL(url));
180        for (size_t i = 0; i < resources.size(); ++i) {
181            const WebPageSerializer::Resource& resource = resources[i];
182            if (resource.url == webURL && !resource.data.isEmpty() && !resource.mimeType.compare(WebCString(mimeType)))
183                return true;
184        }
185        return false;
186    }
187
188    WebView* m_webView;
189
190private:
191    WebString m_htmlMimeType;
192    WebString m_xhtmlMimeType;
193    WebString m_cssMimeType;
194    WebString m_pngMimeType;
195    WebString m_svgMimeType;
196    TestWebFrameClient m_webFrameClient;
197    WebFrame* m_mainFrame;
198};
199
200// Tests that a page with resources and sub-frame is reported with all its resources.
201TEST_F(WebPageNewSerializeTest, PageWithFrames)
202{
203    // Register the mocked frames.
204    WebURL topFrameURL = toKURL("http://www.test.com");
205    registerMockedURLLoad(topFrameURL, WebString::fromUTF8("top_frame.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
206    registerMockedURLLoad(toKURL("http://www.test.com/iframe.html"), WebString::fromUTF8("iframe.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
207    registerMockedURLLoad(toKURL("http://www.test.com/iframe2.html"), WebString::fromUTF8("iframe2.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
208    registerMockedURLLoad(toKURL("http://www.test.com/red_background.png"), WebString::fromUTF8("red_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
209    registerMockedURLLoad(toKURL("http://www.test.com/green_background.png"), WebString::fromUTF8("green_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
210    registerMockedURLLoad(toKURL("http://www.test.com/blue_background.png"), WebString::fromUTF8("blue_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
211
212    loadURLInTopFrame(topFrameURL);
213    // OBJECT/EMBED have some delay to start to load their content. The first
214    // serveAsynchronousMockedRequests call in loadURLInTopFrame() finishes
215    // before the start.
216    RefPtr<Document> document = static_cast<PassRefPtr<Document> >(m_webView->mainFrame()->document());
217    document->updateLayoutIgnorePendingStylesheets(Document::RunPostLayoutTasksSynchronously);
218    Platform::current()->unitTestSupport()->serveAsynchronousMockedRequests();
219
220    WebVector<WebPageSerializer::Resource> resources;
221    WebPageSerializer::serialize(m_webView, &resources);
222    ASSERT_FALSE(resources.isEmpty());
223
224    // The first resource should be the main-frame.
225    const WebPageSerializer::Resource& resource = resources[0];
226    EXPECT_TRUE(resource.url == WebURL(toKURL("http://www.test.com")));
227    EXPECT_EQ(0, resource.mimeType.compare(WebCString("text/html")));
228    EXPECT_FALSE(resource.data.isEmpty());
229
230    EXPECT_EQ(6U, resources.size()); // There should be no duplicates.
231    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/red_background.png", "image/png"));
232    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/green_background.png", "image/png"));
233    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/blue_background.png", "image/png"));
234    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/iframe.html", "text/html"));
235    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/iframe2.html", "text/html"));
236}
237
238// Test that when serializing a page, all CSS resources are reported, including url()'s
239// and imports and links. Note that we don't test the resources contents, we only make sure
240// they are all reported with the right mime type and that they contain some data.
241TEST_F(WebPageNewSerializeTest, FAILS_CSSResources)
242{
243    // Register the mocked frame and load it.
244    WebURL topFrameURL = setUpCSSTestPage();
245    loadURLInTopFrame(topFrameURL);
246
247    WebVector<WebPageSerializer::Resource> resources;
248    WebPageSerializer::serialize(m_webView, &resources);
249    ASSERT_FALSE(resources.isEmpty());
250
251    // The first resource should be the main-frame.
252    const WebPageSerializer::Resource& resource = resources[0];
253    EXPECT_TRUE(resource.url == WebURL(toKURL("http://www.test.com")));
254    EXPECT_EQ(0, resource.mimeType.compare(WebCString("text/html")));
255    EXPECT_FALSE(resource.data.isEmpty());
256
257    EXPECT_EQ(12U, resources.size()); // There should be no duplicates.
258    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/link_styles.css", "text/css"));
259    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/import_styles.css", "text/css"));
260    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/import_style_from_link.css", "text/css"));
261    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/red_background.png", "image/png"));
262    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/orange_background.png", "image/png"));
263    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/yellow_background.png", "image/png"));
264    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/green_background.png", "image/png"));
265    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/blue_background.png", "image/png"));
266    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/purple_background.png", "image/png"));
267    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/ul-dot.png", "image/png"));
268    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/ol-dot.png", "image/png"));
269}
270
271// Tests that when serializing a page with blank frames these are reported with their resources.
272TEST_F(WebPageNewSerializeTest, BlankFrames)
273{
274    // Register the mocked frame and load it.
275    WebURL topFrameURL = toKURL("http://www.test.com");
276    registerMockedURLLoad(topFrameURL, WebString::fromUTF8("blank_frames.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
277    registerMockedURLLoad(toKURL("http://www.test.com/red_background.png"), WebString::fromUTF8("red_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
278    registerMockedURLLoad(toKURL("http://www.test.com/orange_background.png"), WebString::fromUTF8("orange_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
279    registerMockedURLLoad(toKURL("http://www.test.com/blue_background.png"), WebString::fromUTF8("blue_background.png"), WebString::fromUTF8("pageserializer/"), pngMimeType());
280
281    loadURLInTopFrame(topFrameURL);
282
283    WebVector<WebPageSerializer::Resource> resources;
284    WebPageSerializer::serialize(m_webView, &resources);
285    ASSERT_FALSE(resources.isEmpty());
286
287    // The first resource should be the main-frame.
288    const WebPageSerializer::Resource& resource = resources[0];
289    EXPECT_TRUE(resource.url == WebURL(toKURL("http://www.test.com")));
290    EXPECT_EQ(0, resource.mimeType.compare(WebCString("text/html")));
291    EXPECT_FALSE(resource.data.isEmpty());
292
293    EXPECT_EQ(7U, resources.size()); // There should be no duplicates.
294    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/red_background.png", "image/png"));
295    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/orange_background.png", "image/png"));
296    EXPECT_TRUE(resourceVectorContains(resources, "http://www.test.com/blue_background.png", "image/png"));
297    // The blank frames should have got a magic URL.
298    EXPECT_TRUE(resourceVectorContains(resources, "wyciwyg://frame/0", "text/html"));
299    EXPECT_TRUE(resourceVectorContains(resources, "wyciwyg://frame/1", "text/html"));
300    EXPECT_TRUE(resourceVectorContains(resources, "wyciwyg://frame/2", "text/html"));
301}
302
303TEST_F(WebPageNewSerializeTest, SerializeXMLHasRightDeclaration)
304{
305    WebURL topFrameURL = toKURL("http://www.test.com/simple.xhtml");
306    registerMockedURLLoad(topFrameURL, WebString::fromUTF8("simple.xhtml"), WebString::fromUTF8("pageserializer/"), xhtmlMimeType());
307
308    loadURLInTopFrame(topFrameURL);
309
310    WebVector<WebPageSerializer::Resource> resources;
311    WebPageSerializer::serialize(m_webView, &resources);
312    ASSERT_FALSE(resources.isEmpty());
313
314    // We expect only one resource, the XML.
315    ASSERT_EQ(1U, resources.size());
316    std::string xml = std::string(resources[0].data.data());
317
318    // We should have one and only one instance of the XML declaration.
319    size_t pos = xml.find("<?xml version=");
320    ASSERT_TRUE(pos != std::string::npos);
321
322    pos = xml.find("<?xml version=", pos + 1);
323    ASSERT_TRUE(pos == std::string::npos);
324}
325
326TEST_F(WebPageNewSerializeTest, FAILS_TestMHTMLEncoding)
327{
328    // Load a page with some CSS and some images.
329    WebURL topFrameURL = setUpCSSTestPage();
330    loadURLInTopFrame(topFrameURL);
331
332    WebCString mhtmlData = WebPageSerializer::serializeToMHTML(m_webView);
333    ASSERT_FALSE(mhtmlData.isEmpty());
334
335    // Read the MHTML data line per line and do some pseudo-parsing to make sure the right encoding is used for the different sections.
336    LineReader lineReader(std::string(mhtmlData.data()));
337    int sectionCheckedCount = 0;
338    const char* expectedEncoding = 0;
339    std::string line;
340    while (lineReader.getNextLine(&line)) {
341        if (!line.find("Content-Type:")) {
342            ASSERT_FALSE(expectedEncoding);
343            if (line.find("multipart/related;") != std::string::npos) {
344                // Skip this one, it's part of the MHTML header.
345                continue;
346            }
347            if (line.find("text/") != std::string::npos)
348                expectedEncoding = "quoted-printable";
349            else if (line.find("image/") != std::string::npos)
350                expectedEncoding = "base64";
351            else
352                FAIL() << "Unexpected Content-Type: " << line;
353            continue;
354        }
355        if (!line.find("Content-Transfer-Encoding:")) {
356           ASSERT_TRUE(expectedEncoding);
357           EXPECT_TRUE(line.find(expectedEncoding) != std::string::npos);
358           expectedEncoding = 0;
359           sectionCheckedCount++;
360        }
361    }
362    EXPECT_EQ(12, sectionCheckedCount);
363}
364
365// Test that we don't regress https://bugs.webkit.org/show_bug.cgi?id=99105
366TEST_F(WebPageNewSerializeTest, SVGImageDontCrash)
367{
368    WebURL pageUrl = toKURL("http://www.test.com");
369    WebURL imageUrl = toKURL("http://www.test.com/green_rectangle.svg");
370
371    registerMockedURLLoad(pageUrl, WebString::fromUTF8("page_with_svg_image.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
372    registerMockedURLLoad(imageUrl, WebString::fromUTF8("green_rectangle.svg"), WebString::fromUTF8("pageserializer/"), svgMimeType());
373
374    loadURLInTopFrame(pageUrl);
375
376    WebCString mhtml = WebPageSerializer::serializeToMHTML(m_webView);
377    // We expect some data to be generated.
378    EXPECT_GT(mhtml.length(), 50U);
379}
380
381TEST_F(WebPageNewSerializeTest, NamespaceElementsDontCrash)
382{
383    WebURL pageUrl = toKURL("http://www.test.com");
384    registerMockedURLLoad(pageUrl, WebString::fromUTF8("namespace_element.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
385
386    loadURLInTopFrame(pageUrl);
387
388    WebVector<WebURL> localLinks(static_cast<size_t>(1));
389    WebVector<WebString> localPaths(static_cast<size_t>(1));
390    localLinks[0] = pageUrl;
391    localPaths[0] = WebString("/");
392
393    size_t counter = 0;
394    LengthCountingWebPageSerializerClient client(&counter);
395
396    // We just want to make sure nothing crazy happens, namely that no
397    // assertions are hit. As a sanity check, we also make sure that some data
398    // was returned.
399    WebPageSerializer::serialize(m_webView->mainFrame(), true, &client, localLinks, localPaths, WebString(""));
400
401    EXPECT_GT(counter, 0U);
402}
403
404}
405
406TEST_F(WebPageNewSerializeTest, TestMHTMLEncodingWithDataURL)
407{
408    // Load a page with some data urls.
409    WebURL topFrameURL = toKURL("http://www.test.com");
410    registerMockedURLLoad(topFrameURL, WebString::fromUTF8("page_with_data.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
411    loadURLInTopFrame(topFrameURL);
412
413    WebCString mhtmlData = WebPageSerializer::serializeToMHTML(m_webView);
414    ASSERT_FALSE(mhtmlData.isEmpty());
415
416    // Read the MHTML data line and check that the string data:image is found
417    // exactly one time.
418    size_t nbDataURLs = 0;
419    LineReader lineReader(std::string(mhtmlData.data()));
420    std::string line;
421    while (lineReader.getNextLine(&line)) {
422        if (line.find("data:image") != std::string::npos)
423            nbDataURLs++;
424    }
425    EXPECT_EQ(1u, nbDataURLs);
426}
427
428
429TEST_F(WebPageNewSerializeTest, TestMHTMLEncodingWithMorphingDataURL)
430{
431    // Load a page with some data urls.
432    WebURL topFrameURL = toKURL("http://www.test.com");
433    registerMockedURLLoad(topFrameURL, WebString::fromUTF8("page_with_morphing_data.html"), WebString::fromUTF8("pageserializer/"), htmlMimeType());
434    loadURLInTopFrame(topFrameURL);
435
436    WebCString mhtmlData = WebPageSerializer::serializeToMHTML(m_webView);
437    ASSERT_FALSE(mhtmlData.isEmpty());
438
439    // Read the MHTML data line and check that the string data:image is found
440    // exactly two times.
441    size_t nbDataURLs = 0;
442    LineReader lineReader(std::string(mhtmlData.data()));
443    std::string line;
444    while (lineReader.getNextLine(&line)) {
445        if (line.find("data:text") != std::string::npos)
446            nbDataURLs++;
447    }
448    EXPECT_EQ(2u, nbDataURLs);
449}
450