15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/*
25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2009 Google Inc. All rights reserved.
35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without
55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modification, are permitted provided that the following conditions are
65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * met:
75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *     * Redistributions of source code must retain the above copyright
95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * notice, this list of conditions and the following disclaimer.
105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *     * Redistributions in binary form must reproduce the above
115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * copyright notice, this list of conditions and the following disclaimer
125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * in the documentation and/or other materials provided with the
135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * distribution.
145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *     * Neither the name of Google Inc. nor the names of its
155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * contributors may be used to endorse or promote products derived from
165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * this software without specific prior written permission.
175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */
305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// How we handle the base tag better.
325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Current status:
335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// At now the normal way we use to handling base tag is
345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// a) For those links which have corresponding local saved files, such as
355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// savable CSS, JavaScript files, they will be written to relative URLs which
365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// point to local saved file. Why those links can not be resolved as absolute
375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// file URLs, because if they are resolved as absolute URLs, after moving the
385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// file location from one directory to another directory, the file URLs will
395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// be dead links.
405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// b) For those links which have not corresponding local saved files, such as
415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// links in A, AREA tags, they will be resolved as absolute URLs.
425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// c) We comment all base tags when serialzing DOM for the page.
435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// FireFox also uses above way to handle base tag.
445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)//
455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Problem:
465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// This way can not handle the following situation:
475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// the base tag is written by JavaScript.
485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// For example. The page "www.yahoo.com" use
495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// "document.write('<base href="http://www.yahoo.com/"...');" to setup base URL
505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// of page when loading page. So when saving page as completed-HTML, we assume
515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// that we save "www.yahoo.com" to "c:\yahoo.htm". After then we load the saved
525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// completed-HTML page, then the JavaScript will insert a base tag
535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// <base href="http://www.yahoo.com/"...> to DOM, so all URLs which point to
545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// local saved resource files will be resolved as
555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// "http://www.yahoo.com/yahoo_files/...", which will cause all saved  resource
565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// files can not be loaded correctly. Also the page will be rendered ugly since
575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// all saved sub-resource files (such as CSS, JavaScript files) and sub-frame
585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// files can not be fetched.
595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Now FireFox, IE and WebKit based Browser all have this problem.
605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)//
615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Solution:
625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// My solution is that we comment old base tag and write new base tag:
635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// <base href="." ...> after the previous commented base tag. In WebKit, it
645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// always uses the latest "href" attribute of base tag to set document's base
655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// URL. Based on this behavior, when we encounter a base tag, we comment it and
665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// write a new base tag <base href="."> after the previous commented base tag.
675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// The new added base tag can help engine to locate correct base URL for
685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// correctly loading local saved resource files. Also I think we need to inherit
695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// the base target value from document object when appending new base tag.
705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// If there are multiple base tags in original document, we will comment all old
715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// base tags and append new base tag after each old base tag because we do not
725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// know those old base tags are original content or added by JavaScript. If
735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// they are added by JavaScript, it means when loading saved page, the script(s)
745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// will still insert base tag(s) to DOM, so the new added base tag(s) can
755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// override the incorrect base URL and make sure we alway load correct local
765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// saved resource files.
775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include "config.h"
79f91f5fa1608c2cdd9af1842fb5dadbe78275be2aBo Liu#include "web/WebPageSerializerImpl.h"
805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
8176c265b59aa821ccbf8c75ab2bb0d036e97d2956Torne (Richard Coles)#include "core/HTMLNames.h"
8253e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/dom/Document.h"
8353e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/dom/DocumentType.h"
8453e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/dom/Element.h"
8553e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/editing/markup.h"
8653e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/HTMLAllCollection.h"
8753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/HTMLElement.h"
8853e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/HTMLFormElement.h"
89e69819bd8e388ea4ad1636a19aa6b2eed4952191Ben Murdoch#include "core/html/HTMLHtmlElement.h"
9053e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/HTMLMetaElement.h"
9153e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/loader/DocumentLoader.h"
9253e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/loader/FrameLoader.h"
935267f701546148b83dfbe1d151cb184385bb5c22Torne (Richard Coles)#include "public/platform/WebVector.h"
94f91f5fa1608c2cdd9af1842fb5dadbe78275be2aBo Liu#include "web/WebLocalFrameImpl.h"
95521d96ec04ace82590870fb04353ec4f82bb150fTorne (Richard Coles)#include "wtf/text/TextEncoding.h"
965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
9751b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles)namespace blink {
985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Maximum length of data buffer which is used to temporary save generated
1005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// html content data. This is a soft limit which might be passed if a very large
1015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// contegious string is found in the page.
1025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static const unsigned dataBufferCapacity = 65536;
1035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)WebPageSerializerImpl::SerializeDomParam::SerializeDomParam(const KURL& url,
10581a5157921f1d2a7ff6aae115bfe3c139b38a5c8Torne (Richard Coles)                                                            const WTF::TextEncoding& textEncoding,
1065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                                            Document* document,
1075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                                            const String& directoryName)
1085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    : url(url)
1095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , textEncoding(textEncoding)
1105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , document(document)
1115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , directoryName(directoryName)
1125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , isHTMLDocument(document->isHTMLDocument())
1135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , haveSeenDocType(false)
1145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , haveAddedCharsetDeclaration(false)
1155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , skipMetaElement(0)
1165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , isInScriptOrStyleTag(false)
1175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , haveAddedXMLProcessingDirective(false)
1185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , haveAddedContentsBeforeEnd(false)
1195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)String WebPageSerializerImpl::preActionBeforeSerializeOpenTag(
1235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const Element* element, SerializeDomParam* param, bool* needSkip)
1245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    StringBuilder result;
1265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    *needSkip = false;
1285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (param->isHTMLDocument) {
1295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Skip the open tag of original META tag which declare charset since we
1305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // have overrided the META which have correct charset declaration after
1315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // serializing open tag of HEAD element.
132d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        ASSERT(element);
133d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        if (isHTMLMetaElement(*element)) {
134d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            const HTMLMetaElement& meta = toHTMLMetaElement(*element);
1355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // Check whether the META tag has declared charset or not.
136d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)            String equiv = meta.httpEquiv();
1375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            if (equalIgnoringCase(equiv, "content-type")) {
138d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)                String content = meta.content();
1395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                if (content.length() && content.contains("charset", false)) {
1405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                    // Find META tag declared charset, we need to skip it when
1415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                    // serializing DOM.
1425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                    param->skipMetaElement = element;
1435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                    *needSkip = true;
1445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                }
1455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            }
146d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        } else if (isHTMLHtmlElement(*element)) {
1475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // Check something before processing the open tag of HEAD element.
1485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // First we add doc type declaration if original document has it.
1495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            if (!param->haveSeenDocType) {
1505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                param->haveSeenDocType = true;
1515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                result.append(createMarkup(param->document->doctype()));
1525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            }
1535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // Add MOTW declaration before html tag.
1555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
1565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            result.append(WebPageSerializer::generateMarkOfTheWebDeclaration(param->url));
157d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        } else if (isHTMLBaseElement(*element)) {
1585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // Comment the BASE tag when serializing dom.
1599e12abdf8c3a23d52091ea54ebb6a04d327f9300Torne (Richard Coles)            result.appendLiteral("<!--");
1605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
1615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    } else {
1625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Write XML declaration.
1635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (!param->haveAddedXMLProcessingDirective) {
1645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            param->haveAddedXMLProcessingDirective = true;
1655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // Get encoding info.
1665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            String xmlEncoding = param->document->xmlEncoding();
1675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            if (xmlEncoding.isEmpty())
168c0e19a689c8ac22cdc96b291a8d33a5d3b0b34a4Torne (Richard Coles)                xmlEncoding = param->document->encodingName();
1695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            if (xmlEncoding.isEmpty())
1705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                xmlEncoding = UTF8Encoding().name();
1719e12abdf8c3a23d52091ea54ebb6a04d327f9300Torne (Richard Coles)            result.appendLiteral("<?xml version=\"");
1725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            result.append(param->document->xmlVersion());
1739e12abdf8c3a23d52091ea54ebb6a04d327f9300Torne (Richard Coles)            result.appendLiteral("\" encoding=\"");
1745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            result.append(xmlEncoding);
1755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            if (param->document->xmlStandalone())
1769e12abdf8c3a23d52091ea54ebb6a04d327f9300Torne (Richard Coles)                result.appendLiteral("\" standalone=\"yes");
1779e12abdf8c3a23d52091ea54ebb6a04d327f9300Torne (Richard Coles)            result.appendLiteral("\"?>\n");
1785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
1795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Add doc type declaration if original document has it.
1805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (!param->haveSeenDocType) {
1815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            param->haveSeenDocType = true;
1825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            result.append(createMarkup(param->document->doctype()));
1835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
1845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return result.toString();
1865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)String WebPageSerializerImpl::postActionAfterSerializeOpenTag(
1895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const Element* element, SerializeDomParam* param)
1905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1915c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    StringBuilder result;
1925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    param->haveAddedContentsBeforeEnd = false;
1945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!param->isHTMLDocument)
1955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return result.toString();
1965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Check after processing the open tag of HEAD element
1975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!param->haveAddedCharsetDeclaration
198d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)        && isHTMLHeadElement(*element)) {
1995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        param->haveAddedCharsetDeclaration = true;
2005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Check meta element. WebKit only pre-parse the first 512 bytes
2015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // of the document. If the whole <HEAD> is larger and meta is the
2025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // end of head part, then this kind of pages aren't decoded correctly
2035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // because of this issue. So when we serialize the DOM, we need to
2045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // make sure the meta will in first child of head tag.
2055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // See http://bugs.webkit.org/show_bug.cgi?id=16621.
2065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // First we generate new content for writing correct META element.
2075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        result.append(WebPageSerializer::generateMetaCharsetDeclaration(
2085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            String(param->textEncoding.name())));
2095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        param->haveAddedContentsBeforeEnd = true;
2115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Will search each META which has charset declaration, and skip them all
2125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // in PreActionBeforeSerializeOpenTag.
213d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    } else if (isHTMLScriptElement(*element) || isHTMLScriptElement(*element)) {
2145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        param->isInScriptOrStyleTag = true;
2155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
2165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return result.toString();
2185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
2195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)String WebPageSerializerImpl::preActionBeforeSerializeEndTag(
2215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const Element* element, SerializeDomParam* param, bool* needSkip)
2225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
2235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    String result;
2245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    *needSkip = false;
2265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!param->isHTMLDocument)
2275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return result;
2285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Skip the end tag of original META tag which declare charset.
2295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Need not to check whether it's META tag since we guarantee
2305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // skipMetaElement is definitely META tag if it's not 0.
231d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    if (param->skipMetaElement == element) {
2325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        *needSkip = true;
233d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    } else if (isHTMLScriptElement(*element) || isHTMLScriptElement(*element)) {
2345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        ASSERT(param->isInScriptOrStyleTag);
2355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        param->isInScriptOrStyleTag = false;
2365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
2375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return result;
2395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
2405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// After we finish serializing end tag of a element, we give the target
2425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// element a chance to do some post work to add some additional data.
2435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)String WebPageSerializerImpl::postActionAfterSerializeEndTag(
2445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const Element* element, SerializeDomParam* param)
2455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
2465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    StringBuilder result;
2475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!param->isHTMLDocument)
2495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return result.toString();
2505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Comment the BASE tag when serializing DOM.
251d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    if (isHTMLBaseElement(*element)) {
2529e12abdf8c3a23d52091ea54ebb6a04d327f9300Torne (Richard Coles)        result.appendLiteral("-->");
2535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Append a new base tag declaration.
2545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        result.append(WebPageSerializer::generateBaseTagDeclaration(
2555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            param->document->baseTarget()));
2565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
2575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return result.toString();
2595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
2605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void WebPageSerializerImpl::saveHTMLContentToBuffer(
2625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const String& result, SerializeDomParam* param)
2635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
2645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    m_dataBuffer.append(result);
2655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsNotFinished,
2665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                         param,
2675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                         DoNotForceFlush);
2685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
2695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void WebPageSerializerImpl::encodeAndFlushBuffer(
2715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    WebPageSerializerClient::PageSerializationStatus status,
2725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    SerializeDomParam* param,
2735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    FlushOption flushOption)
2745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
2755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Data buffer is not full nor do we want to force flush.
2765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (flushOption != ForceFlush && m_dataBuffer.length() <= dataBufferCapacity)
2775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
2785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    String content = m_dataBuffer.toString();
2805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    m_dataBuffer.clear();
2815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
28202772c6a72f1ee0b226341a4f4439970c29fc861Ben Murdoch    CString encodedContent = param->textEncoding.normalizeAndEncode(content, WTF::EntitiesForUnencodables);
2835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Send result to the client.
2855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    m_client->didSerializeDataForFrame(param->url,
2865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                       WebCString(encodedContent.data(), encodedContent.length()),
2875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                       status);
2885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
2895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void WebPageSerializerImpl::openTagToString(Element* element,
2915c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                            SerializeDomParam* param)
2925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
2935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    bool needSkip;
2945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    StringBuilder result;
2955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Do pre action for open tag.
2965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    result.append(preActionBeforeSerializeOpenTag(element, param, &needSkip));
2975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (needSkip)
2985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
2995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Add open tag
3005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    result.append('<');
3015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    result.append(element->nodeName().lower());
3025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Go through all attributes and serialize them.
303c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    AttributeCollection attributes = element->attributes();
304e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)    AttributeCollection::iterator end = attributes.end();
305e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)    for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it) {
306c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        result.append(' ');
307c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        // Add attribute pair
308c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        result.append(it->name().toString());
309c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        result.appendLiteral("=\"");
310c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        if (!it->value().isEmpty()) {
311c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            const String& attrValue = it->value();
3125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
313c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            // Check whether we need to replace some resource links
314c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            // with local resource paths.
315c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            const QualifiedName& attrName = it->name();
316c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            if (element->hasLegalLinkAttribute(attrName)) {
317c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                // For links start with "javascript:", we do not change it.
318c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                if (attrValue.startsWith("javascript:", false)) {
319c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    result.append(attrValue);
3205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                } else {
321c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    // Get the absolute link
322c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    WebLocalFrameImpl* subFrame = WebLocalFrameImpl::fromFrameOwnerElement(element);
323c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    String completeURL = subFrame ? subFrame->frame()->document()->url() :
324c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                                                    param->document->completeURL(attrValue);
325c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    // Check whether we have local files for those link.
326c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    if (m_localLinks.contains(completeURL)) {
327c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                        if (!param->directoryName.isEmpty()) {
328c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                            result.appendLiteral("./");
329c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                            result.append(param->directoryName);
330c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                            result.append('/');
331c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                        }
332c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                        result.append(m_localLinks.get(completeURL));
333c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    } else {
334c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                        result.append(completeURL);
335c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    }
3365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                }
337c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)            } else {
338c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                if (param->isHTMLDocument)
339c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    result.append(m_htmlEntities.convertEntitiesInString(attrValue));
340c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                else
341c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)                    result.append(m_xmlEntities.convertEntitiesInString(attrValue));
3425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            }
3435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
344c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)        result.append('\"');
3455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
3465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
3475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Do post action for open tag.
3485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    String addedContents = postActionAfterSerializeOpenTag(element, param);
3495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Complete the open tag for element when it has child/children.
350d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    if (element->hasChildren() || param->haveAddedContentsBeforeEnd)
3515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        result.append('>');
3525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Append the added contents generate in  post action of open tag.
3535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    result.append(addedContents);
3545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Save the result to data buffer.
3555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    saveHTMLContentToBuffer(result.toString(), param);
3565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
3575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
3585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Serialize end tag of an specified element.
3595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void WebPageSerializerImpl::endTagToString(Element* element,
3605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                           SerializeDomParam* param)
3615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
3625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    bool needSkip;
3635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    StringBuilder result;
3645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Do pre action for end tag.
3655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    result.append(preActionBeforeSerializeEndTag(element, param, &needSkip));
3665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (needSkip)
3675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
3685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Write end tag when element has child/children.
369d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    if (element->hasChildren() || param->haveAddedContentsBeforeEnd) {
3705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        result.appendLiteral("</");
3715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        result.append(element->nodeName().lower());
3725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        result.append('>');
3735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    } else {
3745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Check whether we have to write end tag for empty element.
3755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (param->isHTMLDocument) {
3765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            result.append('>');
3775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // FIXME: This code is horribly wrong.  WebPageSerializerImpl must die.
37881a5157921f1d2a7ff6aae115bfe3c139b38a5c8Torne (Richard Coles)            if (!element->isHTMLElement() || !toHTMLElement(element)->ieForbidsInsertHTML()) {
3795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                // We need to write end tag when it is required.
3805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                result.appendLiteral("</");
3815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                result.append(element->nodeName().lower());
3825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                result.append('>');
3835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            }
3845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        } else {
3855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            // For xml base document.
3865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            result.appendLiteral(" />");
3875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
3885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
3895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Do post action for end tag.
3905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    result.append(postActionAfterSerializeEndTag(element, param));
3915c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Save the result to data buffer.
3925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    saveHTMLContentToBuffer(result.toString(), param);
3935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
3945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
3955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void WebPageSerializerImpl::buildContentForNode(Node* node,
3965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                                SerializeDomParam* param)
3975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
3985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    switch (node->nodeType()) {
3995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    case Node::ELEMENT_NODE:
4005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Process open tag of element.
401926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        openTagToString(toElement(node), param);
4025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Walk through the children nodes and process it.
4035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        for (Node *child = node->firstChild(); child; child = child->nextSibling())
4045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            buildContentForNode(child, param);
4055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Process end tag of element.
406926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        endTagToString(toElement(node), param);
4075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        break;
4085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    case Node::TEXT_NODE:
4095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        saveHTMLContentToBuffer(createMarkup(node), param);
4105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        break;
4115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    case Node::ATTRIBUTE_NODE:
4125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    case Node::DOCUMENT_NODE:
4135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    case Node::DOCUMENT_FRAGMENT_NODE:
4145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Should not exist.
4155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        ASSERT_NOT_REACHED();
4165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        break;
4175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Document type node can be in DOM?
4185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    case Node::DOCUMENT_TYPE_NODE:
4195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        param->haveSeenDocType = true;
4205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    default:
4215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // For other type node, call default action.
4225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        saveHTMLContentToBuffer(createMarkup(node), param);
4235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        break;
4245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
4255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
4265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)WebPageSerializerImpl::WebPageSerializerImpl(WebFrame* frame,
4285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                             bool recursiveSerialization,
4295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                             WebPageSerializerClient* client,
4305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                             const WebVector<WebURL>& links,
4315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                             const WebVector<WebString>& localPaths,
4325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                                             const WebString& localDirectoryName)
4335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    : m_client(client)
4345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_recursiveSerialization(recursiveSerialization)
4355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_framesCollected(false)
4365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_localDirectoryName(localDirectoryName)
4375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_htmlEntities(false)
4385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_xmlEntities(true)
4395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
4405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Must specify available webframe.
4415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(frame);
44210f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch    m_specifiedWebLocalFrameImpl = toWebLocalFrameImpl(frame);
4435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Make sure we have non 0 client.
4445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(client);
4455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Build local resources map.
4465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(links.size() == localPaths.size());
4475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    for (size_t i = 0; i < links.size(); i++) {
4485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        KURL url = links[i];
4495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        ASSERT(!m_localLinks.contains(url.string()));
4505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_localLinks.set(url.string(), localPaths[i]);
4515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
4525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(m_dataBuffer.isEmpty());
4545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
4555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void WebPageSerializerImpl::collectTargetFrames()
4575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
4585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(!m_framesCollected);
4595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    m_framesCollected = true;
4605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // First, process main frame.
46210f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch    m_frames.append(m_specifiedWebLocalFrameImpl);
4635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Return now if user only needs to serialize specified frame, not including
4645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // all sub-frames.
4655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!m_recursiveSerialization)
4665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
4675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Collect all frames inside the specified frame.
4685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    for (int i = 0; i < static_cast<int>(m_frames.size()); ++i) {
46910f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch        WebLocalFrameImpl* currentFrame = m_frames[i];
4705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Get current using document.
4715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        Document* currentDoc = currentFrame->frame()->document();
4725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Go through sub-frames.
47376c265b59aa821ccbf8c75ab2bb0d036e97d2956Torne (Richard Coles)        RefPtrWillBeRawPtr<HTMLAllCollection> all = currentDoc->all();
4745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
475d6cdb82654e8f3343a693ca752d5c4cee0324e17Torne (Richard Coles)        for (unsigned i = 0; Element* element = all->item(i); ++i) {
47609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            if (!element->isHTMLElement())
4775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                continue;
47810f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch            WebLocalFrameImpl* webFrame =
47910f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch                WebLocalFrameImpl::fromFrameOwnerElement(element);
4805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            if (webFrame)
4815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                m_frames.append(webFrame);
4825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
4835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
4845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
4855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)bool WebPageSerializerImpl::serialize()
4875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
4885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!m_framesCollected)
4895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        collectTargetFrames();
4905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4915c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    bool didSerialization = false;
49210f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch    KURL mainURL = m_specifiedWebLocalFrameImpl->frame()->document()->url();
4935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    for (unsigned i = 0; i < m_frames.size(); ++i) {
49510f88d5669dbd969c059d61ba09fa37dd72ac559Ben Murdoch        WebLocalFrameImpl* webFrame = m_frames[i];
4965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        Document* document = webFrame->frame()->document();
4975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        const KURL& url = document->url();
4985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (!url.isValid() || !m_localLinks.contains(url.string()))
5005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            continue;
5015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        didSerialization = true;
5035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
504c0e19a689c8ac22cdc96b291a8d33a5d3b0b34a4Torne (Richard Coles)        const WTF::TextEncoding& textEncoding = document->encoding().isValid() ? document->encoding() : UTF8Encoding();
5055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        String directoryName = url == mainURL ? m_localDirectoryName : "";
5065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        SerializeDomParam param(url, textEncoding, document, directoryName);
5085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        Element* documentElement = document->documentElement();
5105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (documentElement)
5115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            buildContentForNode(documentElement, &param);
5125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsFinished, &param, ForceFlush);
5145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
5155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(m_dataBuffer.isEmpty());
5175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    m_client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished);
5185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return didSerialization;
5195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
52151b2906e11752df6c18351cf520e30522d3b53a1Torne (Richard Coles)}  // namespace blink
522