HTMLConstructionSite.cpp revision cad810f21b803229eb11403f9209855525a25d57
1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "HTMLTreeBuilder.h"
28
29#include "Comment.h"
30#include "DocumentFragment.h"
31#include "DocumentType.h"
32#include "Element.h"
33#include "Frame.h"
34#include "HTMLDocument.h"
35#include "HTMLElementFactory.h"
36#include "HTMLFormElement.h"
37#include "HTMLHtmlElement.h"
38#include "HTMLNames.h"
39#include "HTMLScriptElement.h"
40#include "HTMLToken.h"
41#include "HTMLTokenizer.h"
42#include "LocalizedStrings.h"
43#if ENABLE(MATHML)
44#include "MathMLNames.h"
45#endif
46#include "NotImplemented.h"
47#if ENABLE(SVG)
48#include "SVGNames.h"
49#endif
50#include "ScriptController.h"
51#include "Settings.h"
52#include "Text.h"
53#include <wtf/UnusedParam.h>
54
55namespace WebCore {
56
57using namespace HTMLNames;
58
59namespace {
60
61bool hasImpliedEndTag(Element* element)
62{
63    return element->hasTagName(ddTag)
64        || element->hasTagName(dtTag)
65        || element->hasTagName(liTag)
66        || element->hasTagName(optionTag)
67        || element->hasTagName(optgroupTag)
68        || element->hasTagName(pTag)
69        || element->hasTagName(rpTag)
70        || element->hasTagName(rtTag);
71}
72
73bool causesFosterParenting(const QualifiedName& tagName)
74{
75    return tagName == tableTag
76        || tagName == tbodyTag
77        || tagName == tfootTag
78        || tagName == theadTag
79        || tagName == trTag;
80}
81
82} // namespace
83
84template<typename ChildType>
85PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRefPtr<ChildType> prpChild)
86{
87    RefPtr<ChildType> child = prpChild;
88
89    // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
90    // redirection to the foster parent but HTMLConstructionSite::attachAtSite
91    // doesn't.  It feels like we're missing a concept somehow.
92    if (shouldFosterParent()) {
93        fosterParent(child.get());
94        ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
95        return child.release();
96    }
97
98    parent->parserAddChild(child);
99
100    // An event handler (DOM Mutation, beforeload, et al.) could have removed
101    // the child, in which case we shouldn't try attaching it.
102    if (!child->parentNode())
103        return child.release();
104
105    // It's slightly unfortunate that we need to hold a reference to child
106    // here to call attach().  We should investigate whether we can rely on
107    // |parent| to hold a ref at this point.  In the common case (at least
108    // for elements), however, we'll get to use this ref in the stack of
109    // open elements.
110    if (parent->attached() && !child->attached())
111        child->attach();
112    return child.release();
113}
114
115void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
116{
117    // FIXME: It's unfortunate that we need to hold a reference to child
118    // here to call attach().  We should investigate whether we can rely on
119    // |site.parent| to hold a ref at this point.
120    RefPtr<Node> child = prpChild;
121
122    if (site.nextChild)
123        site.parent->parserInsertBefore(child, site.nextChild);
124    else
125        site.parent->parserAddChild(child);
126
127    // JavaScript run from beforeload (or DOM Mutation or event handlers)
128    // might have removed the child, in which case we should not attach it.
129    if (child->parentNode() && site.parent->attached() && !child->attached())
130        child->attach();
131}
132
133HTMLConstructionSite::HTMLConstructionSite(Document* document, FragmentScriptingPermission scriptingPermission, bool isParsingFragment)
134    : m_document(document)
135    , m_fragmentScriptingPermission(scriptingPermission)
136    , m_isParsingFragment(isParsingFragment)
137    , m_redirectAttachToFosterParent(false)
138{
139}
140
141HTMLConstructionSite::~HTMLConstructionSite()
142{
143}
144
145void HTMLConstructionSite::detach()
146{
147    m_document = 0;
148}
149
150void HTMLConstructionSite::setForm(HTMLFormElement* form)
151{
152    // This method should only be needed for HTMLTreeBuilder in the fragment case.
153    ASSERT(!m_form);
154    m_form = form;
155}
156
157PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
158{
159    return m_form.release();
160}
161
162void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
163{
164    ASSERT(m_document);
165    if (m_document->frame() && !m_isParsingFragment)
166        m_document->frame()->loader()->dispatchDocumentElementAvailable();
167}
168
169void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
170{
171    RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
172    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
173    m_openElements.pushHTMLHtmlElement(attach<Element>(m_document, element.get()));
174#if ENABLE(OFFLINE_WEB_APPLICATIONS)
175    element->insertedByParser();
176#endif
177    dispatchDocumentElementAvailableIfNeeded();
178}
179
180void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
181{
182    if (!token.attributes())
183        return;
184
185    NamedNodeMap* attributes = element->attributes(false);
186    for (unsigned i = 0; i < token.attributes()->length(); ++i) {
187        Attribute* attribute = token.attributes()->attributeItem(i);
188        if (!attributes->getAttributeItem(attribute->name()))
189            element->setAttribute(attribute->name(), attribute->value());
190    }
191}
192
193void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
194{
195    // FIXME: parse error
196    mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
197}
198
199void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
200{
201    // FIXME: parse error
202    mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
203}
204
205void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
206{
207    ASSERT(token.type() == HTMLToken::DOCTYPE);
208    attach(m_document, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
209
210    if (token.forceQuirks())
211        m_document->setCompatibilityMode(Document::QuirksMode);
212    else
213        m_document->setCompatibilityModeFromDoctype();
214}
215
216void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
217{
218    ASSERT(token.type() == HTMLToken::Comment);
219    attach(currentElement(), Comment::create(currentElement()->document(), token.comment()));
220}
221
222void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
223{
224    ASSERT(token.type() == HTMLToken::Comment);
225    attach(m_document, Comment::create(m_document, token.comment()));
226}
227
228void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
229{
230    ASSERT(token.type() == HTMLToken::Comment);
231    Element* parent = m_openElements.htmlElement();
232    attach(parent, Comment::create(parent->document(), token.comment()));
233}
234
235PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
236{
237    return attach(currentElement(), child);
238}
239
240void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
241{
242    ASSERT(!shouldFosterParent());
243    m_head = attachToCurrent(createHTMLElement(token));
244    m_openElements.pushHTMLHeadElement(m_head);
245}
246
247void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
248{
249    ASSERT(!shouldFosterParent());
250    m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
251}
252
253void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
254{
255    RefPtr<Element> element = createHTMLElement(token);
256    ASSERT(element->hasTagName(formTag));
257    RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
258    form->setDemoted(isDemoted);
259    m_openElements.push(attachToCurrent(form.release()));
260    ASSERT(currentElement()->isHTMLElement());
261    ASSERT(currentElement()->hasTagName(formTag));
262    m_form = static_cast<HTMLFormElement*>(currentElement());
263}
264
265void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
266{
267    m_openElements.push(attachToCurrent(createHTMLElement(token)));
268}
269
270void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
271{
272    ASSERT(token.type() == HTMLToken::StartTag);
273    RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
274    // Normally HTMLElementStack is responsible for calling finishParsingChildren,
275    // but self-closing elements are never in the element stack so the stack
276    // doesn't get a chance to tell them that we're done parsing their children.
277    element->finishParsingChildren();
278    // FIXME: Do we want to acknowledge the token's self-closing flag?
279    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
280}
281
282void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
283{
284    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
285    // Possible active formatting elements include:
286    // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
287    insertHTMLElement(token);
288    m_activeFormattingElements.append(currentElement());
289}
290
291void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
292{
293    RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentElement()->document(), true);
294    if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
295        element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
296    m_openElements.push(attachToCurrent(element.release()));
297}
298
299void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
300{
301    ASSERT(token.type() == HTMLToken::StartTag);
302    notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
303
304    RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
305    if (!token.selfClosing())
306        m_openElements.push(element);
307}
308
309void HTMLConstructionSite::insertTextNode(const String& characters)
310{
311    AttachmentSite site;
312    site.parent = currentElement();
313    site.nextChild = 0;
314    if (shouldFosterParent())
315        findFosterSite(site);
316
317    Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
318    if (previousChild && previousChild->isTextNode()) {
319        // FIXME: We're only supposed to append to this text node if it
320        // was the last text node inserted by the parser.
321        CharacterData* textNode = static_cast<CharacterData*>(previousChild);
322        textNode->parserAppendData(characters);
323        return;
324    }
325
326    attachAtSite(site, Text::create(site.parent->document(), characters));
327}
328
329PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
330{
331    QualifiedName tagName(nullAtom, token.name(), namespaceURI);
332    RefPtr<Element> element = currentElement()->document()->createElement(tagName, true);
333    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
334    return element.release();
335}
336
337PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
338{
339    QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
340    // FIXME: This can't use HTMLConstructionSite::createElement because we
341    // have to pass the current form element.  We should rework form association
342    // to occur after construction to allow better code sharing here.
343    RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentElement()->document(), form(), true);
344    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
345    ASSERT(element->isHTMLElement());
346    return element.release();
347}
348
349PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
350{
351    return createHTMLElementFromSavedElement(record->element());
352}
353
354namespace {
355
356PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
357{
358    NamedNodeMap* attributes = element->attributes(true);
359    if (!attributes)
360        return 0;
361
362    RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
363    for (size_t i = 0; i < attributes->length(); ++i) {
364        Attribute* attribute = attributes->attributeItem(i);
365        RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
366        newAttributes->addAttribute(clone);
367    }
368    return newAttributes.release();
369}
370
371}
372
373PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
374{
375    // FIXME: This method is wrong.  We should be using the original token.
376    // Using an Element* causes us to fail examples like this:
377    // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
378    // When reconstructTheActiveFormattingElements calls this method to open
379    // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
380    // spec implies it should be "1".  Minefield matches the HTML5 spec here.
381
382    ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
383    AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
384    return createHTMLElement(fakeToken);
385}
386
387bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
388{
389    if (m_activeFormattingElements.isEmpty())
390        return false;
391    unsigned index = m_activeFormattingElements.size();
392    do {
393        --index;
394        const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
395        if (entry.isMarker() || m_openElements.contains(entry.element())) {
396            firstUnopenElementIndex = index + 1;
397            return firstUnopenElementIndex < m_activeFormattingElements.size();
398        }
399    } while (index);
400    firstUnopenElementIndex = index;
401    return true;
402}
403
404void HTMLConstructionSite::reconstructTheActiveFormattingElements()
405{
406    unsigned firstUnopenElementIndex;
407    if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
408        return;
409
410    unsigned unopenEntryIndex = firstUnopenElementIndex;
411    ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
412    for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
413        HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
414        RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
415        m_openElements.push(attachToCurrent(reconstructed.release()));
416        unopenedEntry.replaceElement(currentElement());
417    }
418}
419
420void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
421{
422    while (hasImpliedEndTag(currentElement()) && !currentElement()->hasLocalName(tagName))
423        m_openElements.pop();
424}
425
426void HTMLConstructionSite::generateImpliedEndTags()
427{
428    while (hasImpliedEndTag(currentElement()))
429        m_openElements.pop();
430}
431
432void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
433{
434    HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
435    if (lastTableElementRecord) {
436        Element* lastTableElement = lastTableElementRecord->element();
437        if (ContainerNode* parent = lastTableElement->parentNode()) {
438            site.parent = parent;
439            site.nextChild = lastTableElement;
440            return;
441        }
442        site.parent = lastTableElementRecord->next()->element();
443        site.nextChild = 0;
444        return;
445    }
446    // Fragment case
447    site.parent = m_openElements.bottom(); // <html> element
448    site.nextChild = 0;
449}
450
451bool HTMLConstructionSite::shouldFosterParent() const
452{
453    return m_redirectAttachToFosterParent
454        && causesFosterParenting(currentElement()->tagQName());
455}
456
457void HTMLConstructionSite::fosterParent(Node* node)
458{
459    AttachmentSite site;
460    findFosterSite(site);
461    attachAtSite(site, node);
462}
463
464}
465