1d0825bca7fe65beaee391d30da42e937db621564Steve Block/*
2d0825bca7fe65beaee391d30da42e937db621564Steve Block * Copyright (C) 2009 Google Inc. All rights reserved.
3d0825bca7fe65beaee391d30da42e937db621564Steve Block *
4d0825bca7fe65beaee391d30da42e937db621564Steve Block * Redistribution and use in source and binary forms, with or without
5d0825bca7fe65beaee391d30da42e937db621564Steve Block * modification, are permitted provided that the following conditions are
6d0825bca7fe65beaee391d30da42e937db621564Steve Block * met:
7d0825bca7fe65beaee391d30da42e937db621564Steve Block *
8d0825bca7fe65beaee391d30da42e937db621564Steve Block *     * Redistributions of source code must retain the above copyright
9d0825bca7fe65beaee391d30da42e937db621564Steve Block * notice, this list of conditions and the following disclaimer.
10d0825bca7fe65beaee391d30da42e937db621564Steve Block *     * Redistributions in binary form must reproduce the above
11d0825bca7fe65beaee391d30da42e937db621564Steve Block * copyright notice, this list of conditions and the following disclaimer
12d0825bca7fe65beaee391d30da42e937db621564Steve Block * in the documentation and/or other materials provided with the
13d0825bca7fe65beaee391d30da42e937db621564Steve Block * distribution.
14d0825bca7fe65beaee391d30da42e937db621564Steve Block *     * Neither the name of Google Inc. nor the names of its
15d0825bca7fe65beaee391d30da42e937db621564Steve Block * contributors may be used to endorse or promote products derived from
16d0825bca7fe65beaee391d30da42e937db621564Steve Block * this software without specific prior written permission.
17d0825bca7fe65beaee391d30da42e937db621564Steve Block *
18d0825bca7fe65beaee391d30da42e937db621564Steve Block * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19d0825bca7fe65beaee391d30da42e937db621564Steve Block * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20d0825bca7fe65beaee391d30da42e937db621564Steve Block * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21d0825bca7fe65beaee391d30da42e937db621564Steve Block * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22d0825bca7fe65beaee391d30da42e937db621564Steve Block * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23d0825bca7fe65beaee391d30da42e937db621564Steve Block * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24d0825bca7fe65beaee391d30da42e937db621564Steve Block * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25d0825bca7fe65beaee391d30da42e937db621564Steve Block * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26d0825bca7fe65beaee391d30da42e937db621564Steve Block * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27d0825bca7fe65beaee391d30da42e937db621564Steve Block * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28d0825bca7fe65beaee391d30da42e937db621564Steve Block * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29d0825bca7fe65beaee391d30da42e937db621564Steve Block */
30d0825bca7fe65beaee391d30da42e937db621564Steve Block
31d0825bca7fe65beaee391d30da42e937db621564Steve Block// How we handle the base tag better.
32d0825bca7fe65beaee391d30da42e937db621564Steve Block// Current status:
33d0825bca7fe65beaee391d30da42e937db621564Steve Block// At now the normal way we use to handling base tag is
34d0825bca7fe65beaee391d30da42e937db621564Steve Block// a) For those links which have corresponding local saved files, such as
35d0825bca7fe65beaee391d30da42e937db621564Steve Block// savable CSS, JavaScript files, they will be written to relative URLs which
36d0825bca7fe65beaee391d30da42e937db621564Steve Block// point to local saved file. Why those links can not be resolved as absolute
37d0825bca7fe65beaee391d30da42e937db621564Steve Block// file URLs, because if they are resolved as absolute URLs, after moving the
38d0825bca7fe65beaee391d30da42e937db621564Steve Block// file location from one directory to another directory, the file URLs will
39d0825bca7fe65beaee391d30da42e937db621564Steve Block// be dead links.
40d0825bca7fe65beaee391d30da42e937db621564Steve Block// b) For those links which have not corresponding local saved files, such as
41d0825bca7fe65beaee391d30da42e937db621564Steve Block// links in A, AREA tags, they will be resolved as absolute URLs.
42d0825bca7fe65beaee391d30da42e937db621564Steve Block// c) We comment all base tags when serialzing DOM for the page.
43d0825bca7fe65beaee391d30da42e937db621564Steve Block// FireFox also uses above way to handle base tag.
44d0825bca7fe65beaee391d30da42e937db621564Steve Block//
45d0825bca7fe65beaee391d30da42e937db621564Steve Block// Problem:
46d0825bca7fe65beaee391d30da42e937db621564Steve Block// This way can not handle the following situation:
47d0825bca7fe65beaee391d30da42e937db621564Steve Block// the base tag is written by JavaScript.
48d0825bca7fe65beaee391d30da42e937db621564Steve Block// For example. The page "www.yahoo.com" use
49d0825bca7fe65beaee391d30da42e937db621564Steve Block// "document.write('<base href="http://www.yahoo.com/"...');" to setup base URL
50d0825bca7fe65beaee391d30da42e937db621564Steve Block// of page when loading page. So when saving page as completed-HTML, we assume
51d0825bca7fe65beaee391d30da42e937db621564Steve Block// that we save "www.yahoo.com" to "c:\yahoo.htm". After then we load the saved
52d0825bca7fe65beaee391d30da42e937db621564Steve Block// completed-HTML page, then the JavaScript will insert a base tag
53d0825bca7fe65beaee391d30da42e937db621564Steve Block// <base href="http://www.yahoo.com/"...> to DOM, so all URLs which point to
54d0825bca7fe65beaee391d30da42e937db621564Steve Block// local saved resource files will be resolved as
55d0825bca7fe65beaee391d30da42e937db621564Steve Block// "http://www.yahoo.com/yahoo_files/...", which will cause all saved  resource
56d0825bca7fe65beaee391d30da42e937db621564Steve Block// files can not be loaded correctly. Also the page will be rendered ugly since
57d0825bca7fe65beaee391d30da42e937db621564Steve Block// all saved sub-resource files (such as CSS, JavaScript files) and sub-frame
58d0825bca7fe65beaee391d30da42e937db621564Steve Block// files can not be fetched.
59d0825bca7fe65beaee391d30da42e937db621564Steve Block// Now FireFox, IE and WebKit based Browser all have this problem.
60d0825bca7fe65beaee391d30da42e937db621564Steve Block//
61d0825bca7fe65beaee391d30da42e937db621564Steve Block// Solution:
62d0825bca7fe65beaee391d30da42e937db621564Steve Block// My solution is that we comment old base tag and write new base tag:
63d0825bca7fe65beaee391d30da42e937db621564Steve Block// <base href="." ...> after the previous commented base tag. In WebKit, it
64d0825bca7fe65beaee391d30da42e937db621564Steve Block// always uses the latest "href" attribute of base tag to set document's base
65d0825bca7fe65beaee391d30da42e937db621564Steve Block// URL. Based on this behavior, when we encounter a base tag, we comment it and
66d0825bca7fe65beaee391d30da42e937db621564Steve Block// write a new base tag <base href="."> after the previous commented base tag.
67d0825bca7fe65beaee391d30da42e937db621564Steve Block// The new added base tag can help engine to locate correct base URL for
68d0825bca7fe65beaee391d30da42e937db621564Steve Block// correctly loading local saved resource files. Also I think we need to inherit
69d0825bca7fe65beaee391d30da42e937db621564Steve Block// the base target value from document object when appending new base tag.
70d0825bca7fe65beaee391d30da42e937db621564Steve Block// If there are multiple base tags in original document, we will comment all old
71d0825bca7fe65beaee391d30da42e937db621564Steve Block// base tags and append new base tag after each old base tag because we do not
72d0825bca7fe65beaee391d30da42e937db621564Steve Block// know those old base tags are original content or added by JavaScript. If
73d0825bca7fe65beaee391d30da42e937db621564Steve Block// they are added by JavaScript, it means when loading saved page, the script(s)
74d0825bca7fe65beaee391d30da42e937db621564Steve Block// will still insert base tag(s) to DOM, so the new added base tag(s) can
75d0825bca7fe65beaee391d30da42e937db621564Steve Block// override the incorrect base URL and make sure we alway load correct local
76d0825bca7fe65beaee391d30da42e937db621564Steve Block// saved resource files.
77d0825bca7fe65beaee391d30da42e937db621564Steve Block
78d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "config.h"
79d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "WebPageSerializerImpl.h"
80d0825bca7fe65beaee391d30da42e937db621564Steve Block
81d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "Document.h"
82d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "DocumentType.h"
83d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "Element.h"
84d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "FrameLoader.h"
85d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "HTMLAllCollection.h"
86d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "HTMLElement.h"
87d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "HTMLFormElement.h"
88d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "HTMLMetaElement.h"
89d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "HTMLNames.h"
90d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "KURL.h"
91d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "PlatformString.h"
92d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "StringBuilder.h"
93d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "TextEncoding.h"
94d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "markup.h"
95d0825bca7fe65beaee391d30da42e937db621564Steve Block
96d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "DOMUtilitiesPrivate.h"
97d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "WebFrameImpl.h"
98d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "WebURL.h"
99d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "WebVector.h"
100d0825bca7fe65beaee391d30da42e937db621564Steve Block
101d0825bca7fe65beaee391d30da42e937db621564Steve Blockusing namespace WebCore;
102d0825bca7fe65beaee391d30da42e937db621564Steve Block
103d0825bca7fe65beaee391d30da42e937db621564Steve Blocknamespace WebKit {
104d0825bca7fe65beaee391d30da42e937db621564Steve Block
105d0825bca7fe65beaee391d30da42e937db621564Steve Block// Maximum length of data buffer which is used to temporary save generated
106d0825bca7fe65beaee391d30da42e937db621564Steve Block// html content data. This is a soft limit which might be passed if a very large
107d0825bca7fe65beaee391d30da42e937db621564Steve Block// contegious string is found in the page.
108d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic const unsigned dataBufferCapacity = 65536;
109d0825bca7fe65beaee391d30da42e937db621564Steve Block
110d0825bca7fe65beaee391d30da42e937db621564Steve BlockWebPageSerializerImpl::SerializeDomParam::SerializeDomParam(const KURL& currentFrameURL,
111d0825bca7fe65beaee391d30da42e937db621564Steve Block                                                            const TextEncoding& textEncoding,
112d0825bca7fe65beaee391d30da42e937db621564Steve Block                                                            Document* doc,
113d0825bca7fe65beaee391d30da42e937db621564Steve Block                                                            const String& directoryName)
114d0825bca7fe65beaee391d30da42e937db621564Steve Block    : currentFrameURL(currentFrameURL)
115d0825bca7fe65beaee391d30da42e937db621564Steve Block    , textEncoding(textEncoding)
116d0825bca7fe65beaee391d30da42e937db621564Steve Block    , doc(doc)
117d0825bca7fe65beaee391d30da42e937db621564Steve Block    , directoryName(directoryName)
118d0825bca7fe65beaee391d30da42e937db621564Steve Block    , hasDoctype(false)
119d0825bca7fe65beaee391d30da42e937db621564Steve Block    , hasCheckedMeta(false)
120d0825bca7fe65beaee391d30da42e937db621564Steve Block    , skipMetaElement(0)
121d0825bca7fe65beaee391d30da42e937db621564Steve Block    , isInScriptOrStyleTag(false)
122d0825bca7fe65beaee391d30da42e937db621564Steve Block    , hasDocDeclaration(false)
123d0825bca7fe65beaee391d30da42e937db621564Steve Block{
124d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Cache the value since we check it lots of times.
125d0825bca7fe65beaee391d30da42e937db621564Steve Block    isHTMLDocument = doc->isHTMLDocument();
126d0825bca7fe65beaee391d30da42e937db621564Steve Block}
127d0825bca7fe65beaee391d30da42e937db621564Steve Block
128d0825bca7fe65beaee391d30da42e937db621564Steve BlockString WebPageSerializerImpl::preActionBeforeSerializeOpenTag(
129d0825bca7fe65beaee391d30da42e937db621564Steve Block    const Element* element, SerializeDomParam* param, bool* needSkip)
130d0825bca7fe65beaee391d30da42e937db621564Steve Block{
131d0825bca7fe65beaee391d30da42e937db621564Steve Block    StringBuilder result;
132d0825bca7fe65beaee391d30da42e937db621564Steve Block
133d0825bca7fe65beaee391d30da42e937db621564Steve Block    *needSkip = false;
134d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (param->isHTMLDocument) {
135d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Skip the open tag of original META tag which declare charset since we
136d0825bca7fe65beaee391d30da42e937db621564Steve Block        // have overrided the META which have correct charset declaration after
137d0825bca7fe65beaee391d30da42e937db621564Steve Block        // serializing open tag of HEAD element.
138d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (element->hasTagName(HTMLNames::metaTag)) {
139d0825bca7fe65beaee391d30da42e937db621564Steve Block            const HTMLMetaElement* meta = static_cast<const HTMLMetaElement*>(element);
140d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Check whether the META tag has declared charset or not.
141d0825bca7fe65beaee391d30da42e937db621564Steve Block            String equiv = meta->httpEquiv();
142d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (equalIgnoringCase(equiv, "content-type")) {
143d0825bca7fe65beaee391d30da42e937db621564Steve Block                String content = meta->content();
144d0825bca7fe65beaee391d30da42e937db621564Steve Block                if (content.length() && content.contains("charset", false)) {
145d0825bca7fe65beaee391d30da42e937db621564Steve Block                    // Find META tag declared charset, we need to skip it when
146d0825bca7fe65beaee391d30da42e937db621564Steve Block                    // serializing DOM.
147d0825bca7fe65beaee391d30da42e937db621564Steve Block                    param->skipMetaElement = element;
148d0825bca7fe65beaee391d30da42e937db621564Steve Block                    *needSkip = true;
149d0825bca7fe65beaee391d30da42e937db621564Steve Block                }
150d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
151d0825bca7fe65beaee391d30da42e937db621564Steve Block        } else if (element->hasTagName(HTMLNames::htmlTag)) {
152d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Check something before processing the open tag of HEAD element.
153d0825bca7fe65beaee391d30da42e937db621564Steve Block            // First we add doc type declaration if original doc has it.
154d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (!param->hasDoctype) {
155d0825bca7fe65beaee391d30da42e937db621564Steve Block                param->hasDoctype = true;
156d0825bca7fe65beaee391d30da42e937db621564Steve Block                result.append(createMarkup(param->doc->doctype()));
157d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
158d0825bca7fe65beaee391d30da42e937db621564Steve Block
159d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Add MOTW declaration before html tag.
160d0825bca7fe65beaee391d30da42e937db621564Steve Block            // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
161d0825bca7fe65beaee391d30da42e937db621564Steve Block            result.append(WebPageSerializer::generateMarkOfTheWebDeclaration(param->currentFrameURL));
162d0825bca7fe65beaee391d30da42e937db621564Steve Block        } else if (element->hasTagName(HTMLNames::baseTag)) {
163d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Comment the BASE tag when serializing dom.
164d0825bca7fe65beaee391d30da42e937db621564Steve Block            result.append("<!--");
165d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
166d0825bca7fe65beaee391d30da42e937db621564Steve Block    } else {
167d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Write XML declaration.
168d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (!param->hasDocDeclaration) {
169d0825bca7fe65beaee391d30da42e937db621564Steve Block            param->hasDocDeclaration = true;
170d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Get encoding info.
171d0825bca7fe65beaee391d30da42e937db621564Steve Block            String xmlEncoding = param->doc->xmlEncoding();
172d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (xmlEncoding.isEmpty())
173d0825bca7fe65beaee391d30da42e937db621564Steve Block                xmlEncoding = param->doc->frame()->loader()->encoding();
174d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (xmlEncoding.isEmpty())
175d0825bca7fe65beaee391d30da42e937db621564Steve Block                xmlEncoding = UTF8Encoding().name();
176d0825bca7fe65beaee391d30da42e937db621564Steve Block            result.append("<?xml version=\"");
177d0825bca7fe65beaee391d30da42e937db621564Steve Block            result.append(param->doc->xmlVersion());
178d0825bca7fe65beaee391d30da42e937db621564Steve Block            result.append("\" encoding=\"");
179d0825bca7fe65beaee391d30da42e937db621564Steve Block            result.append(xmlEncoding);
180d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (param->doc->xmlStandalone())
181d0825bca7fe65beaee391d30da42e937db621564Steve Block                result.append("\" standalone=\"yes");
182d0825bca7fe65beaee391d30da42e937db621564Steve Block            result.append("\"?>\n");
183d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
184d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Add doc type declaration if original doc has it.
185d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (!param->hasDoctype) {
186d0825bca7fe65beaee391d30da42e937db621564Steve Block            param->hasDoctype = true;
187d0825bca7fe65beaee391d30da42e937db621564Steve Block            result.append(createMarkup(param->doc->doctype()));
188d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
189d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
190d0825bca7fe65beaee391d30da42e937db621564Steve Block    return result.toString();
191d0825bca7fe65beaee391d30da42e937db621564Steve Block}
192d0825bca7fe65beaee391d30da42e937db621564Steve Block
193d0825bca7fe65beaee391d30da42e937db621564Steve BlockString WebPageSerializerImpl::postActionAfterSerializeOpenTag(
194d0825bca7fe65beaee391d30da42e937db621564Steve Block    const Element* element, SerializeDomParam* param)
195d0825bca7fe65beaee391d30da42e937db621564Steve Block{
196d0825bca7fe65beaee391d30da42e937db621564Steve Block    StringBuilder result;
197d0825bca7fe65beaee391d30da42e937db621564Steve Block
198d0825bca7fe65beaee391d30da42e937db621564Steve Block    param->hasAddedContentsBeforeEnd = false;
199d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (!param->isHTMLDocument)
200d0825bca7fe65beaee391d30da42e937db621564Steve Block        return result.toString();
201d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Check after processing the open tag of HEAD element
202d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (!param->hasCheckedMeta
203d0825bca7fe65beaee391d30da42e937db621564Steve Block        && element->hasTagName(HTMLNames::headTag)) {
204d0825bca7fe65beaee391d30da42e937db621564Steve Block        param->hasCheckedMeta = true;
205d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Check meta element. WebKit only pre-parse the first 512 bytes
206d0825bca7fe65beaee391d30da42e937db621564Steve Block        // of the document. If the whole <HEAD> is larger and meta is the
207d0825bca7fe65beaee391d30da42e937db621564Steve Block        // end of head part, then this kind of pages aren't decoded correctly
208d0825bca7fe65beaee391d30da42e937db621564Steve Block        // because of this issue. So when we serialize the DOM, we need to
209d0825bca7fe65beaee391d30da42e937db621564Steve Block        // make sure the meta will in first child of head tag.
210d0825bca7fe65beaee391d30da42e937db621564Steve Block        // See http://bugs.webkit.org/show_bug.cgi?id=16621.
211d0825bca7fe65beaee391d30da42e937db621564Steve Block        // First we generate new content for writing correct META element.
212d0825bca7fe65beaee391d30da42e937db621564Steve Block        result.append(WebPageSerializer::generateMetaCharsetDeclaration(
213d0825bca7fe65beaee391d30da42e937db621564Steve Block            String(param->textEncoding.name())));
214d0825bca7fe65beaee391d30da42e937db621564Steve Block
215d0825bca7fe65beaee391d30da42e937db621564Steve Block        param->hasAddedContentsBeforeEnd = true;
216d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Will search each META which has charset declaration, and skip them all
217d0825bca7fe65beaee391d30da42e937db621564Steve Block        // in PreActionBeforeSerializeOpenTag.
218d0825bca7fe65beaee391d30da42e937db621564Steve Block    } else if (element->hasTagName(HTMLNames::scriptTag)
219d0825bca7fe65beaee391d30da42e937db621564Steve Block               || element->hasTagName(HTMLNames::styleTag)) {
220d0825bca7fe65beaee391d30da42e937db621564Steve Block        param->isInScriptOrStyleTag = true;
221d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
222d0825bca7fe65beaee391d30da42e937db621564Steve Block
223d0825bca7fe65beaee391d30da42e937db621564Steve Block    return result.toString();
224d0825bca7fe65beaee391d30da42e937db621564Steve Block}
225d0825bca7fe65beaee391d30da42e937db621564Steve Block
226d0825bca7fe65beaee391d30da42e937db621564Steve BlockString WebPageSerializerImpl::preActionBeforeSerializeEndTag(
227d0825bca7fe65beaee391d30da42e937db621564Steve Block    const Element* element, SerializeDomParam* param, bool* needSkip)
228d0825bca7fe65beaee391d30da42e937db621564Steve Block{
229d0825bca7fe65beaee391d30da42e937db621564Steve Block    String result;
230d0825bca7fe65beaee391d30da42e937db621564Steve Block
231d0825bca7fe65beaee391d30da42e937db621564Steve Block    *needSkip = false;
232d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (!param->isHTMLDocument)
233d0825bca7fe65beaee391d30da42e937db621564Steve Block        return result;
234d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Skip the end tag of original META tag which declare charset.
235d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Need not to check whether it's META tag since we guarantee
236d0825bca7fe65beaee391d30da42e937db621564Steve Block    // skipMetaElement is definitely META tag if it's not 0.
237d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (param->skipMetaElement == element)
238d0825bca7fe65beaee391d30da42e937db621564Steve Block        *needSkip = true;
239d0825bca7fe65beaee391d30da42e937db621564Steve Block    else if (element->hasTagName(HTMLNames::scriptTag)
240d0825bca7fe65beaee391d30da42e937db621564Steve Block             || element->hasTagName(HTMLNames::styleTag)) {
241d0825bca7fe65beaee391d30da42e937db621564Steve Block        ASSERT(param->isInScriptOrStyleTag);
242d0825bca7fe65beaee391d30da42e937db621564Steve Block        param->isInScriptOrStyleTag = false;
243d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
244d0825bca7fe65beaee391d30da42e937db621564Steve Block
245d0825bca7fe65beaee391d30da42e937db621564Steve Block    return result;
246d0825bca7fe65beaee391d30da42e937db621564Steve Block}
247d0825bca7fe65beaee391d30da42e937db621564Steve Block
248d0825bca7fe65beaee391d30da42e937db621564Steve Block// After we finish serializing end tag of a element, we give the target
249d0825bca7fe65beaee391d30da42e937db621564Steve Block// element a chance to do some post work to add some additional data.
250d0825bca7fe65beaee391d30da42e937db621564Steve BlockString WebPageSerializerImpl::postActionAfterSerializeEndTag(
251d0825bca7fe65beaee391d30da42e937db621564Steve Block    const Element* element, SerializeDomParam* param)
252d0825bca7fe65beaee391d30da42e937db621564Steve Block{
253d0825bca7fe65beaee391d30da42e937db621564Steve Block    StringBuilder result;
254d0825bca7fe65beaee391d30da42e937db621564Steve Block
255d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (!param->isHTMLDocument)
256d0825bca7fe65beaee391d30da42e937db621564Steve Block        return result.toString();
257d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Comment the BASE tag when serializing DOM.
258d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (element->hasTagName(HTMLNames::baseTag)) {
259d0825bca7fe65beaee391d30da42e937db621564Steve Block        result.append("-->");
260d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Append a new base tag declaration.
261d0825bca7fe65beaee391d30da42e937db621564Steve Block        result.append(WebPageSerializer::generateBaseTagDeclaration(
262d0825bca7fe65beaee391d30da42e937db621564Steve Block            param->doc->baseTarget()));
263d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
264d0825bca7fe65beaee391d30da42e937db621564Steve Block
265d0825bca7fe65beaee391d30da42e937db621564Steve Block    return result.toString();
266d0825bca7fe65beaee391d30da42e937db621564Steve Block}
267d0825bca7fe65beaee391d30da42e937db621564Steve Block
268d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid WebPageSerializerImpl::saveHTMLContentToBuffer(
269d0825bca7fe65beaee391d30da42e937db621564Steve Block    const String& result, SerializeDomParam* param)
270d0825bca7fe65beaee391d30da42e937db621564Steve Block{
271d0825bca7fe65beaee391d30da42e937db621564Steve Block    m_dataBuffer.append(result);
272d0825bca7fe65beaee391d30da42e937db621564Steve Block    encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsNotFinished,
273d0825bca7fe65beaee391d30da42e937db621564Steve Block                         param,
274d0825bca7fe65beaee391d30da42e937db621564Steve Block                         0);
275d0825bca7fe65beaee391d30da42e937db621564Steve Block}
276d0825bca7fe65beaee391d30da42e937db621564Steve Block
277d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid WebPageSerializerImpl::encodeAndFlushBuffer(
278d0825bca7fe65beaee391d30da42e937db621564Steve Block    WebPageSerializerClient::PageSerializationStatus status,
279d0825bca7fe65beaee391d30da42e937db621564Steve Block    SerializeDomParam* param,
280d0825bca7fe65beaee391d30da42e937db621564Steve Block    bool force)
281d0825bca7fe65beaee391d30da42e937db621564Steve Block{
282d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Data buffer is not full nor do we want to force flush.
283d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (!force && m_dataBuffer.length() <= dataBufferCapacity)
284d0825bca7fe65beaee391d30da42e937db621564Steve Block        return;
285d0825bca7fe65beaee391d30da42e937db621564Steve Block
286d0825bca7fe65beaee391d30da42e937db621564Steve Block    String content = m_dataBuffer.toString();
287d0825bca7fe65beaee391d30da42e937db621564Steve Block    m_dataBuffer.clear();
288d0825bca7fe65beaee391d30da42e937db621564Steve Block
289d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Convert the unicode content to target encoding
290d0825bca7fe65beaee391d30da42e937db621564Steve Block    CString encodedContent = param->textEncoding.encode(
291d0825bca7fe65beaee391d30da42e937db621564Steve Block        content.characters(), content.length(), EntitiesForUnencodables);
292d0825bca7fe65beaee391d30da42e937db621564Steve Block
293d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Send result to the client.
294d0825bca7fe65beaee391d30da42e937db621564Steve Block    m_client->didSerializeDataForFrame(param->currentFrameURL,
295d0825bca7fe65beaee391d30da42e937db621564Steve Block                                       WebCString(encodedContent.data(), encodedContent.length()),
296d0825bca7fe65beaee391d30da42e937db621564Steve Block                                       status);
297d0825bca7fe65beaee391d30da42e937db621564Steve Block}
298d0825bca7fe65beaee391d30da42e937db621564Steve Block
299d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid WebPageSerializerImpl::openTagToString(const Element* element,
300d0825bca7fe65beaee391d30da42e937db621564Steve Block                                            SerializeDomParam* param)
301d0825bca7fe65beaee391d30da42e937db621564Steve Block{
302d0825bca7fe65beaee391d30da42e937db621564Steve Block    // FIXME: use StringBuilder instead of String.
303d0825bca7fe65beaee391d30da42e937db621564Steve Block    bool needSkip;
304d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Do pre action for open tag.
305d0825bca7fe65beaee391d30da42e937db621564Steve Block    String result = preActionBeforeSerializeOpenTag(element, param, &needSkip);
306d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (needSkip)
307d0825bca7fe65beaee391d30da42e937db621564Steve Block        return;
308d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Add open tag
309d0825bca7fe65beaee391d30da42e937db621564Steve Block    result += "<" + element->nodeName();
310d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Go through all attributes and serialize them.
311d0825bca7fe65beaee391d30da42e937db621564Steve Block    const NamedNodeMap *attrMap = element->attributes(true);
312d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (attrMap) {
313d0825bca7fe65beaee391d30da42e937db621564Steve Block        unsigned numAttrs = attrMap->length();
314d0825bca7fe65beaee391d30da42e937db621564Steve Block        for (unsigned i = 0; i < numAttrs; i++) {
315d0825bca7fe65beaee391d30da42e937db621564Steve Block            result += " ";
316d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Add attribute pair
317d0825bca7fe65beaee391d30da42e937db621564Steve Block            const Attribute *attribute = attrMap->attributeItem(i);
318d0825bca7fe65beaee391d30da42e937db621564Steve Block            result += attribute->name().toString();
319d0825bca7fe65beaee391d30da42e937db621564Steve Block            result += "=\"";
320d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (!attribute->value().isEmpty()) {
321d0825bca7fe65beaee391d30da42e937db621564Steve Block                const String& attrValue = attribute->value();
322d0825bca7fe65beaee391d30da42e937db621564Steve Block
323d0825bca7fe65beaee391d30da42e937db621564Steve Block                // Check whether we need to replace some resource links
324d0825bca7fe65beaee391d30da42e937db621564Steve Block                // with local resource paths.
325d0825bca7fe65beaee391d30da42e937db621564Steve Block                const QualifiedName& attrName = attribute->name();
326d0825bca7fe65beaee391d30da42e937db621564Steve Block                if (elementHasLegalLinkAttribute(element, attrName)) {
327d0825bca7fe65beaee391d30da42e937db621564Steve Block                    // For links start with "javascript:", we do not change it.
328d0825bca7fe65beaee391d30da42e937db621564Steve Block                    if (attrValue.startsWith("javascript:", false))
329d0825bca7fe65beaee391d30da42e937db621564Steve Block                        result += attrValue;
330d0825bca7fe65beaee391d30da42e937db621564Steve Block                    else {
331d0825bca7fe65beaee391d30da42e937db621564Steve Block                        // Get the absolute link
332d0825bca7fe65beaee391d30da42e937db621564Steve Block                        String completeURL = param->doc->completeURL(attrValue);
333d0825bca7fe65beaee391d30da42e937db621564Steve Block                        // Check whether we have local files for those link.
334d0825bca7fe65beaee391d30da42e937db621564Steve Block                        if (m_localLinks.contains(completeURL)) {
335d0825bca7fe65beaee391d30da42e937db621564Steve Block                            if (!m_localDirectoryName.isEmpty())
336d0825bca7fe65beaee391d30da42e937db621564Steve Block                                result += "./" + m_localDirectoryName + "/";
337d0825bca7fe65beaee391d30da42e937db621564Steve Block                            result += m_localLinks.get(completeURL);
338d0825bca7fe65beaee391d30da42e937db621564Steve Block                        } else
339d0825bca7fe65beaee391d30da42e937db621564Steve Block                            result += completeURL;
340d0825bca7fe65beaee391d30da42e937db621564Steve Block                    }
341d0825bca7fe65beaee391d30da42e937db621564Steve Block                } else {
342d0825bca7fe65beaee391d30da42e937db621564Steve Block                    if (param->isHTMLDocument)
343d0825bca7fe65beaee391d30da42e937db621564Steve Block                        result += m_htmlEntities.convertEntitiesInString(attrValue);
344d0825bca7fe65beaee391d30da42e937db621564Steve Block                    else
345d0825bca7fe65beaee391d30da42e937db621564Steve Block                        result += m_xmlEntities.convertEntitiesInString(attrValue);
346d0825bca7fe65beaee391d30da42e937db621564Steve Block                }
347d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
348d0825bca7fe65beaee391d30da42e937db621564Steve Block            result += "\"";
349d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
350d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
351d0825bca7fe65beaee391d30da42e937db621564Steve Block
352d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Do post action for open tag.
353d0825bca7fe65beaee391d30da42e937db621564Steve Block    String addedContents = postActionAfterSerializeOpenTag(element, param);
354d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Complete the open tag for element when it has child/children.
355d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (element->hasChildNodes() || param->hasAddedContentsBeforeEnd)
356d0825bca7fe65beaee391d30da42e937db621564Steve Block        result += ">";
357d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Append the added contents generate in  post action of open tag.
358d0825bca7fe65beaee391d30da42e937db621564Steve Block    result += addedContents;
359d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Save the result to data buffer.
360d0825bca7fe65beaee391d30da42e937db621564Steve Block    saveHTMLContentToBuffer(result, param);
361d0825bca7fe65beaee391d30da42e937db621564Steve Block}
362d0825bca7fe65beaee391d30da42e937db621564Steve Block
363d0825bca7fe65beaee391d30da42e937db621564Steve Block// Serialize end tag of an specified element.
364d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid WebPageSerializerImpl::endTagToString(const Element* element,
365d0825bca7fe65beaee391d30da42e937db621564Steve Block                                           SerializeDomParam* param)
366d0825bca7fe65beaee391d30da42e937db621564Steve Block{
367d0825bca7fe65beaee391d30da42e937db621564Steve Block    bool needSkip;
368d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Do pre action for end tag.
369d0825bca7fe65beaee391d30da42e937db621564Steve Block    String result = preActionBeforeSerializeEndTag(element,
370d0825bca7fe65beaee391d30da42e937db621564Steve Block                                                   param,
371d0825bca7fe65beaee391d30da42e937db621564Steve Block                                                   &needSkip);
372d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (needSkip)
373d0825bca7fe65beaee391d30da42e937db621564Steve Block        return;
374d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Write end tag when element has child/children.
375d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (element->hasChildNodes() || param->hasAddedContentsBeforeEnd) {
376d0825bca7fe65beaee391d30da42e937db621564Steve Block        result += "</";
377d0825bca7fe65beaee391d30da42e937db621564Steve Block        result += element->nodeName();
378d0825bca7fe65beaee391d30da42e937db621564Steve Block        result += ">";
379d0825bca7fe65beaee391d30da42e937db621564Steve Block    } else {
380d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Check whether we have to write end tag for empty element.
381d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (param->isHTMLDocument) {
382d0825bca7fe65beaee391d30da42e937db621564Steve Block            result += ">";
383d0825bca7fe65beaee391d30da42e937db621564Steve Block            const HTMLElement* htmlElement =
384d0825bca7fe65beaee391d30da42e937db621564Steve Block            static_cast<const HTMLElement*>(element);
385d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (htmlElement->endTagRequirement() == TagStatusRequired) {
386d0825bca7fe65beaee391d30da42e937db621564Steve Block                // We need to write end tag when it is required.
387d0825bca7fe65beaee391d30da42e937db621564Steve Block                result += "</";
388d0825bca7fe65beaee391d30da42e937db621564Steve Block                result += element->nodeName();
389d0825bca7fe65beaee391d30da42e937db621564Steve Block                result += ">";
390d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
391d0825bca7fe65beaee391d30da42e937db621564Steve Block        } else {
392d0825bca7fe65beaee391d30da42e937db621564Steve Block            // For xml base document.
393d0825bca7fe65beaee391d30da42e937db621564Steve Block            result += " />";
394d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
395d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
396d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Do post action for end tag.
397d0825bca7fe65beaee391d30da42e937db621564Steve Block    result += postActionAfterSerializeEndTag(element, param);
398d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Save the result to data buffer.
399d0825bca7fe65beaee391d30da42e937db621564Steve Block    saveHTMLContentToBuffer(result, param);
400d0825bca7fe65beaee391d30da42e937db621564Steve Block}
401d0825bca7fe65beaee391d30da42e937db621564Steve Block
402d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid WebPageSerializerImpl::buildContentForNode(const Node* node,
403d0825bca7fe65beaee391d30da42e937db621564Steve Block                                                SerializeDomParam* param)
404d0825bca7fe65beaee391d30da42e937db621564Steve Block{
405d0825bca7fe65beaee391d30da42e937db621564Steve Block    switch (node->nodeType()) {
406d0825bca7fe65beaee391d30da42e937db621564Steve Block    case Node::ELEMENT_NODE:
407d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Process open tag of element.
408d0825bca7fe65beaee391d30da42e937db621564Steve Block        openTagToString(static_cast<const Element*>(node), param);
409d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Walk through the children nodes and process it.
410d0825bca7fe65beaee391d30da42e937db621564Steve Block        for (const Node *child = node->firstChild(); child; child = child->nextSibling())
411d0825bca7fe65beaee391d30da42e937db621564Steve Block            buildContentForNode(child, param);
412d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Process end tag of element.
413d0825bca7fe65beaee391d30da42e937db621564Steve Block        endTagToString(static_cast<const Element*>(node), param);
414d0825bca7fe65beaee391d30da42e937db621564Steve Block        break;
415d0825bca7fe65beaee391d30da42e937db621564Steve Block    case Node::TEXT_NODE:
416d0825bca7fe65beaee391d30da42e937db621564Steve Block        saveHTMLContentToBuffer(createMarkup(node), param);
417d0825bca7fe65beaee391d30da42e937db621564Steve Block        break;
418d0825bca7fe65beaee391d30da42e937db621564Steve Block    case Node::ATTRIBUTE_NODE:
419d0825bca7fe65beaee391d30da42e937db621564Steve Block    case Node::DOCUMENT_NODE:
420d0825bca7fe65beaee391d30da42e937db621564Steve Block    case Node::DOCUMENT_FRAGMENT_NODE:
421d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Should not exist.
422d0825bca7fe65beaee391d30da42e937db621564Steve Block        ASSERT_NOT_REACHED();
423d0825bca7fe65beaee391d30da42e937db621564Steve Block        break;
424d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Document type node can be in DOM?
425d0825bca7fe65beaee391d30da42e937db621564Steve Block    case Node::DOCUMENT_TYPE_NODE:
426d0825bca7fe65beaee391d30da42e937db621564Steve Block        param->hasDoctype = true;
427d0825bca7fe65beaee391d30da42e937db621564Steve Block    default:
428d0825bca7fe65beaee391d30da42e937db621564Steve Block        // For other type node, call default action.
429d0825bca7fe65beaee391d30da42e937db621564Steve Block        saveHTMLContentToBuffer(createMarkup(node), param);
430d0825bca7fe65beaee391d30da42e937db621564Steve Block        break;
431d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
432d0825bca7fe65beaee391d30da42e937db621564Steve Block}
433d0825bca7fe65beaee391d30da42e937db621564Steve Block
434d0825bca7fe65beaee391d30da42e937db621564Steve BlockWebPageSerializerImpl::WebPageSerializerImpl(WebFrame* frame,
435d0825bca7fe65beaee391d30da42e937db621564Steve Block                                             bool recursiveSerialization,
436d0825bca7fe65beaee391d30da42e937db621564Steve Block                                             WebPageSerializerClient* client,
437d0825bca7fe65beaee391d30da42e937db621564Steve Block                                             const WebVector<WebURL>& links,
438d0825bca7fe65beaee391d30da42e937db621564Steve Block                                             const WebVector<WebString>& localPaths,
439d0825bca7fe65beaee391d30da42e937db621564Steve Block                                             const WebString& localDirectoryName)
440d0825bca7fe65beaee391d30da42e937db621564Steve Block    : m_client(client)
441d0825bca7fe65beaee391d30da42e937db621564Steve Block    , m_recursiveSerialization(recursiveSerialization)
442d0825bca7fe65beaee391d30da42e937db621564Steve Block    , m_framesCollected(false)
443d0825bca7fe65beaee391d30da42e937db621564Steve Block    , m_localDirectoryName(localDirectoryName)
444d0825bca7fe65beaee391d30da42e937db621564Steve Block    , m_htmlEntities(false)
445d0825bca7fe65beaee391d30da42e937db621564Steve Block    , m_xmlEntities(true)
446d0825bca7fe65beaee391d30da42e937db621564Steve Block{
447d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Must specify available webframe.
448d0825bca7fe65beaee391d30da42e937db621564Steve Block    ASSERT(frame);
449d0825bca7fe65beaee391d30da42e937db621564Steve Block    m_specifiedWebFrameImpl = static_cast<WebFrameImpl*>(frame);
450d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Make sure we have non 0 client.
451d0825bca7fe65beaee391d30da42e937db621564Steve Block    ASSERT(client);
452d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Build local resources map.
453d0825bca7fe65beaee391d30da42e937db621564Steve Block    ASSERT(links.size() == localPaths.size());
454d0825bca7fe65beaee391d30da42e937db621564Steve Block    for (size_t i = 0; i < links.size(); i++) {
455d0825bca7fe65beaee391d30da42e937db621564Steve Block        KURL url = links[i];
456d0825bca7fe65beaee391d30da42e937db621564Steve Block        ASSERT(!m_localLinks.contains(url.string()));
457d0825bca7fe65beaee391d30da42e937db621564Steve Block        m_localLinks.set(url.string(), localPaths[i]);
458d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
459d0825bca7fe65beaee391d30da42e937db621564Steve Block
460d0825bca7fe65beaee391d30da42e937db621564Steve Block    ASSERT(!m_dataBuffer.length());
461d0825bca7fe65beaee391d30da42e937db621564Steve Block}
462d0825bca7fe65beaee391d30da42e937db621564Steve Block
463d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid WebPageSerializerImpl::collectTargetFrames()
464d0825bca7fe65beaee391d30da42e937db621564Steve Block{
465d0825bca7fe65beaee391d30da42e937db621564Steve Block    ASSERT(!m_framesCollected);
466d0825bca7fe65beaee391d30da42e937db621564Steve Block    m_framesCollected = true;
467d0825bca7fe65beaee391d30da42e937db621564Steve Block
468d0825bca7fe65beaee391d30da42e937db621564Steve Block    // First, process main frame.
469d0825bca7fe65beaee391d30da42e937db621564Steve Block    m_frames.append(m_specifiedWebFrameImpl);
470d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Return now if user only needs to serialize specified frame, not including
471d0825bca7fe65beaee391d30da42e937db621564Steve Block    // all sub-frames.
472d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (!m_recursiveSerialization)
473d0825bca7fe65beaee391d30da42e937db621564Steve Block        return;
474d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Collect all frames inside the specified frame.
475d0825bca7fe65beaee391d30da42e937db621564Steve Block    for (int i = 0; i < static_cast<int>(m_frames.size()); ++i) {
476d0825bca7fe65beaee391d30da42e937db621564Steve Block        WebFrameImpl* currentFrame = m_frames[i];
477d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Get current using document.
478d0825bca7fe65beaee391d30da42e937db621564Steve Block        Document* currentDoc = currentFrame->frame()->document();
479d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Go through sub-frames.
480d0825bca7fe65beaee391d30da42e937db621564Steve Block        RefPtr<HTMLAllCollection> all = currentDoc->all();
481d0825bca7fe65beaee391d30da42e937db621564Steve Block        for (Node* node = all->firstItem(); node; node = all->nextItem()) {
482d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (!node->isHTMLElement())
483d0825bca7fe65beaee391d30da42e937db621564Steve Block                continue;
484d0825bca7fe65beaee391d30da42e937db621564Steve Block            Element* element = static_cast<Element*>(node);
485d0825bca7fe65beaee391d30da42e937db621564Steve Block            WebFrameImpl* webFrame =
486d0825bca7fe65beaee391d30da42e937db621564Steve Block                WebFrameImpl::fromFrameOwnerElement(element);
487d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (webFrame)
488d0825bca7fe65beaee391d30da42e937db621564Steve Block                m_frames.append(webFrame);
489d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
490d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
491d0825bca7fe65beaee391d30da42e937db621564Steve Block}
492d0825bca7fe65beaee391d30da42e937db621564Steve Block
493d0825bca7fe65beaee391d30da42e937db621564Steve Blockbool WebPageSerializerImpl::serialize()
494d0825bca7fe65beaee391d30da42e937db621564Steve Block{
495d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Collect target frames.
496d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (!m_framesCollected)
497d0825bca7fe65beaee391d30da42e937db621564Steve Block        collectTargetFrames();
498d0825bca7fe65beaee391d30da42e937db621564Steve Block    bool didSerialization = false;
499d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Get KURL for main frame.
500d0825bca7fe65beaee391d30da42e937db621564Steve Block    KURL mainPageURL = m_specifiedWebFrameImpl->frame()->loader()->url();
501d0825bca7fe65beaee391d30da42e937db621564Steve Block
502d0825bca7fe65beaee391d30da42e937db621564Steve Block    // Go through all frames for serializing DOM for whole page, include
503d0825bca7fe65beaee391d30da42e937db621564Steve Block    // sub-frames.
504d0825bca7fe65beaee391d30da42e937db621564Steve Block    for (int i = 0; i < static_cast<int>(m_frames.size()); ++i) {
505d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Get current serializing frame.
506d0825bca7fe65beaee391d30da42e937db621564Steve Block        WebFrameImpl* currentFrame = m_frames[i];
507d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Get current using document.
508d0825bca7fe65beaee391d30da42e937db621564Steve Block        Document* currentDoc = currentFrame->frame()->document();
509d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Get current frame's URL.
510d0825bca7fe65beaee391d30da42e937db621564Steve Block        const KURL& currentFrameURL = currentFrame->frame()->loader()->url();
511d0825bca7fe65beaee391d30da42e937db621564Steve Block
512d0825bca7fe65beaee391d30da42e937db621564Steve Block        // Check whether we have done this document.
513d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (m_localLinks.contains(currentFrameURL.string())) {
514d0825bca7fe65beaee391d30da42e937db621564Steve Block            // A new document, we will serialize it.
515d0825bca7fe65beaee391d30da42e937db621564Steve Block            didSerialization = true;
516d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Get target encoding for current document.
517d0825bca7fe65beaee391d30da42e937db621564Steve Block            String encoding = currentFrame->frame()->loader()->encoding();
518d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Create the text encoding object with target encoding.
519d0825bca7fe65beaee391d30da42e937db621564Steve Block            TextEncoding textEncoding(encoding);
520d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Construct serialize parameter for late processing document.
521d0825bca7fe65beaee391d30da42e937db621564Steve Block            SerializeDomParam param(currentFrameURL,
522d0825bca7fe65beaee391d30da42e937db621564Steve Block                                    encoding.length() ? textEncoding : UTF8Encoding(),
523d0825bca7fe65beaee391d30da42e937db621564Steve Block                                    currentDoc,
524d0825bca7fe65beaee391d30da42e937db621564Steve Block                                    currentFrameURL == mainPageURL ? m_localDirectoryName : "");
525d0825bca7fe65beaee391d30da42e937db621564Steve Block
526d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Process current document.
527d0825bca7fe65beaee391d30da42e937db621564Steve Block            Element* rootElement = currentDoc->documentElement();
528d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (rootElement)
529d0825bca7fe65beaee391d30da42e937db621564Steve Block                buildContentForNode(rootElement, &param);
530d0825bca7fe65beaee391d30da42e937db621564Steve Block
531d0825bca7fe65beaee391d30da42e937db621564Steve Block            // Flush the remainder data and finish serializing current frame.
532d0825bca7fe65beaee391d30da42e937db621564Steve Block            encodeAndFlushBuffer(WebPageSerializerClient::CurrentFrameIsFinished,
533d0825bca7fe65beaee391d30da42e937db621564Steve Block                                 &param,
534d0825bca7fe65beaee391d30da42e937db621564Steve Block                                 1);
535d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
536d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
537d0825bca7fe65beaee391d30da42e937db621564Steve Block
538d0825bca7fe65beaee391d30da42e937db621564Steve Block    // We have done call frames, so we send message to embedder to tell it that
539d0825bca7fe65beaee391d30da42e937db621564Steve Block    // frames are finished serializing.
540d0825bca7fe65beaee391d30da42e937db621564Steve Block    ASSERT(!m_dataBuffer.length());
541d0825bca7fe65beaee391d30da42e937db621564Steve Block    m_client->didSerializeDataForFrame(KURL(),
542d0825bca7fe65beaee391d30da42e937db621564Steve Block                                       WebCString("", 0),
543d0825bca7fe65beaee391d30da42e937db621564Steve Block                                       WebPageSerializerClient::AllFramesAreFinished);
544d0825bca7fe65beaee391d30da42e937db621564Steve Block    return didSerialization;
545d0825bca7fe65beaee391d30da42e937db621564Steve Block}
546d0825bca7fe65beaee391d30da42e937db621564Steve Block
547d0825bca7fe65beaee391d30da42e937db621564Steve Block}  // namespace WebKit
548