HTMLConstructionSite.cpp revision 2fc2651226baac27029e38c9d6ef883fa32084db
1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "HTMLTreeBuilder.h"
28
29#include "Comment.h"
30#include "DocumentFragment.h"
31#include "DocumentType.h"
32#include "Element.h"
33#include "Frame.h"
34#include "HTMLDocument.h"
35#include "HTMLElementFactory.h"
36#include "HTMLFormElement.h"
37#include "HTMLHtmlElement.h"
38#include "HTMLNames.h"
39#include "HTMLScriptElement.h"
40#include "HTMLToken.h"
41#include "HTMLTokenizer.h"
42#include "LocalizedStrings.h"
43#if ENABLE(MATHML)
44#include "MathMLNames.h"
45#endif
46#include "NotImplemented.h"
47#if ENABLE(SVG)
48#include "SVGNames.h"
49#endif
50#include "ScriptController.h"
51#include "Settings.h"
52#include "Text.h"
53#include <wtf/UnusedParam.h>
54
55namespace WebCore {
56
57using namespace HTMLNames;
58
59namespace {
60
61bool hasImpliedEndTag(Element* element)
62{
63    return element->hasTagName(ddTag)
64        || element->hasTagName(dtTag)
65        || element->hasTagName(liTag)
66        || element->hasTagName(optionTag)
67        || element->hasTagName(optgroupTag)
68        || element->hasTagName(pTag)
69        || element->hasTagName(rpTag)
70        || element->hasTagName(rtTag);
71}
72
73bool causesFosterParenting(const QualifiedName& tagName)
74{
75    return tagName == tableTag
76        || tagName == tbodyTag
77        || tagName == tfootTag
78        || tagName == theadTag
79        || tagName == trTag;
80}
81
82} // namespace
83
84template<typename ChildType>
85PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* parent, PassRefPtr<ChildType> prpChild)
86{
87    RefPtr<ChildType> child = prpChild;
88
89    // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
90    // redirection to the foster parent but HTMLConstructionSite::attachAtSite
91    // doesn't.  It feels like we're missing a concept somehow.
92    if (shouldFosterParent()) {
93        fosterParent(child.get());
94        ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
95        return child.release();
96    }
97
98    parent->parserAddChild(child);
99
100    // An event handler (DOM Mutation, beforeload, et al.) could have removed
101    // the child, in which case we shouldn't try attaching it.
102    if (!child->parentNode())
103        return child.release();
104
105    // It's slightly unfortunate that we need to hold a reference to child
106    // here to call attach().  We should investigate whether we can rely on
107    // |parent| to hold a ref at this point.  In the common case (at least
108    // for elements), however, we'll get to use this ref in the stack of
109    // open elements.
110    if (parent->attached() && !child->attached())
111        child->attach();
112    return child.release();
113}
114
115void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
116{
117    // FIXME: It's unfortunate that we need to hold a reference to child
118    // here to call attach().  We should investigate whether we can rely on
119    // |site.parent| to hold a ref at this point.
120    RefPtr<Node> child = prpChild;
121
122    if (site.nextChild)
123        site.parent->parserInsertBefore(child, site.nextChild);
124    else
125        site.parent->parserAddChild(child);
126
127    // JavaScript run from beforeload (or DOM Mutation or event handlers)
128    // might have removed the child, in which case we should not attach it.
129    if (child->parentNode() && site.parent->attached() && !child->attached())
130        child->attach();
131}
132
133HTMLConstructionSite::HTMLConstructionSite(Document* document)
134    : m_document(document)
135    , m_attachmentRoot(document)
136    , m_fragmentScriptingPermission(FragmentScriptingAllowed)
137    , m_isParsingFragment(false)
138    , m_redirectAttachToFosterParent(false)
139{
140}
141
142HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
143    : m_document(fragment->document())
144    , m_attachmentRoot(fragment)
145    , m_fragmentScriptingPermission(scriptingPermission)
146    , m_isParsingFragment(true)
147    , m_redirectAttachToFosterParent(false)
148{
149}
150
151HTMLConstructionSite::~HTMLConstructionSite()
152{
153}
154
155void HTMLConstructionSite::detach()
156{
157    m_document = 0;
158    m_attachmentRoot = 0;
159}
160
161void HTMLConstructionSite::setForm(HTMLFormElement* form)
162{
163    // This method should only be needed for HTMLTreeBuilder in the fragment case.
164    ASSERT(!m_form);
165    m_form = form;
166}
167
168PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
169{
170    return m_form.release();
171}
172
173void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
174{
175    ASSERT(m_document);
176    if (m_document->frame() && !m_isParsingFragment)
177        m_document->frame()->loader()->dispatchDocumentElementAvailable();
178}
179
180void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
181{
182    RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
183    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
184    m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
185#if ENABLE(OFFLINE_WEB_APPLICATIONS)
186    element->insertedByParser();
187#endif
188    dispatchDocumentElementAvailableIfNeeded();
189}
190
191void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
192{
193    if (!token.attributes())
194        return;
195
196    NamedNodeMap* attributes = element->attributes(false);
197    for (unsigned i = 0; i < token.attributes()->length(); ++i) {
198        Attribute* attribute = token.attributes()->attributeItem(i);
199        if (!attributes->getAttributeItem(attribute->name()))
200            element->setAttribute(attribute->name(), attribute->value());
201    }
202}
203
204void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
205{
206    // FIXME: parse error
207    mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
208}
209
210void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
211{
212    // FIXME: parse error
213    mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
214}
215
216void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
217{
218    ASSERT(token.type() == HTMLToken::DOCTYPE);
219    attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
220
221    // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
222    // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
223    // because context-less fragments can determine their own quirks mode, and thus change
224    // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
225    // in a fragment, as changing the owning document's compatibility mode would be wrong.
226    ASSERT(!m_isParsingFragment);
227    if (m_isParsingFragment)
228        return;
229
230    if (token.forceQuirks())
231        m_document->setCompatibilityMode(Document::QuirksMode);
232    else
233        m_document->setCompatibilityModeFromDoctype();
234}
235
236void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
237{
238    ASSERT(token.type() == HTMLToken::Comment);
239    attach(currentElement(), Comment::create(currentElement()->document(), token.comment()));
240}
241
242void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
243{
244    ASSERT(token.type() == HTMLToken::Comment);
245    attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
246}
247
248void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
249{
250    ASSERT(token.type() == HTMLToken::Comment);
251    Element* parent = m_openElements.htmlElement();
252    attach(parent, Comment::create(parent->document(), token.comment()));
253}
254
255PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
256{
257    return attach(currentElement(), child);
258}
259
260void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
261{
262    ASSERT(!shouldFosterParent());
263    m_head = attachToCurrent(createHTMLElement(token));
264    m_openElements.pushHTMLHeadElement(m_head);
265}
266
267void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
268{
269    ASSERT(!shouldFosterParent());
270    m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
271}
272
273void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
274{
275    RefPtr<Element> element = createHTMLElement(token);
276    ASSERT(element->hasTagName(formTag));
277    RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
278    form->setDemoted(isDemoted);
279    m_openElements.push(attachToCurrent(form.release()));
280    ASSERT(currentElement()->isHTMLElement());
281    ASSERT(currentElement()->hasTagName(formTag));
282    m_form = static_cast<HTMLFormElement*>(currentElement());
283}
284
285void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
286{
287    m_openElements.push(attachToCurrent(createHTMLElement(token)));
288}
289
290void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
291{
292    ASSERT(token.type() == HTMLToken::StartTag);
293    RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
294    // Normally HTMLElementStack is responsible for calling finishParsingChildren,
295    // but self-closing elements are never in the element stack so the stack
296    // doesn't get a chance to tell them that we're done parsing their children.
297    element->finishParsingChildren();
298    // FIXME: Do we want to acknowledge the token's self-closing flag?
299    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
300}
301
302void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
303{
304    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
305    // Possible active formatting elements include:
306    // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
307    insertHTMLElement(token);
308    m_activeFormattingElements.append(currentElement());
309}
310
311void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
312{
313    RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentElement()->document(), true);
314    if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
315        element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
316    m_openElements.push(attachToCurrent(element.release()));
317}
318
319void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
320{
321    ASSERT(token.type() == HTMLToken::StartTag);
322    notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
323
324    RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
325    if (!token.selfClosing())
326        m_openElements.push(element);
327}
328
329void HTMLConstructionSite::insertTextNode(const String& characters)
330{
331    AttachmentSite site;
332    site.parent = currentElement();
333    site.nextChild = 0;
334    if (shouldFosterParent())
335        findFosterSite(site);
336
337    Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
338    if (previousChild && previousChild->isTextNode()) {
339        // FIXME: We're only supposed to append to this text node if it
340        // was the last text node inserted by the parser.
341        CharacterData* textNode = static_cast<CharacterData*>(previousChild);
342        textNode->parserAppendData(characters);
343        return;
344    }
345
346    attachAtSite(site, Text::create(site.parent->document(), characters));
347}
348
349PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
350{
351    QualifiedName tagName(nullAtom, token.name(), namespaceURI);
352    RefPtr<Element> element = currentElement()->document()->createElement(tagName, true);
353    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
354    return element.release();
355}
356
357PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
358{
359    QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
360    // FIXME: This can't use HTMLConstructionSite::createElement because we
361    // have to pass the current form element.  We should rework form association
362    // to occur after construction to allow better code sharing here.
363    RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentElement()->document(), form(), true);
364    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
365    ASSERT(element->isHTMLElement());
366    return element.release();
367}
368
369PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
370{
371    return createHTMLElementFromSavedElement(record->element());
372}
373
374namespace {
375
376PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
377{
378    NamedNodeMap* attributes = element->attributes(true);
379    if (!attributes)
380        return 0;
381
382    RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
383    for (size_t i = 0; i < attributes->length(); ++i) {
384        Attribute* attribute = attributes->attributeItem(i);
385        RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
386        newAttributes->addAttribute(clone);
387    }
388    return newAttributes.release();
389}
390
391}
392
393PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
394{
395    // FIXME: This method is wrong.  We should be using the original token.
396    // Using an Element* causes us to fail examples like this:
397    // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
398    // When reconstructTheActiveFormattingElements calls this method to open
399    // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
400    // spec implies it should be "1".  Minefield matches the HTML5 spec here.
401
402    ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
403    AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
404    return createHTMLElement(fakeToken);
405}
406
407bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
408{
409    if (m_activeFormattingElements.isEmpty())
410        return false;
411    unsigned index = m_activeFormattingElements.size();
412    do {
413        --index;
414        const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
415        if (entry.isMarker() || m_openElements.contains(entry.element())) {
416            firstUnopenElementIndex = index + 1;
417            return firstUnopenElementIndex < m_activeFormattingElements.size();
418        }
419    } while (index);
420    firstUnopenElementIndex = index;
421    return true;
422}
423
424void HTMLConstructionSite::reconstructTheActiveFormattingElements()
425{
426    unsigned firstUnopenElementIndex;
427    if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
428        return;
429
430    unsigned unopenEntryIndex = firstUnopenElementIndex;
431    ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
432    for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
433        HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
434        RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
435        m_openElements.push(attachToCurrent(reconstructed.release()));
436        unopenedEntry.replaceElement(currentElement());
437    }
438}
439
440void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
441{
442    while (hasImpliedEndTag(currentElement()) && !currentElement()->hasLocalName(tagName))
443        m_openElements.pop();
444}
445
446void HTMLConstructionSite::generateImpliedEndTags()
447{
448    while (hasImpliedEndTag(currentElement()))
449        m_openElements.pop();
450}
451
452void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
453{
454    HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
455    if (lastTableElementRecord) {
456        Element* lastTableElement = lastTableElementRecord->element();
457        if (ContainerNode* parent = lastTableElement->parentNode()) {
458            site.parent = parent;
459            site.nextChild = lastTableElement;
460            return;
461        }
462        site.parent = lastTableElementRecord->next()->element();
463        site.nextChild = 0;
464        return;
465    }
466    // Fragment case
467    site.parent = m_openElements.bottom(); // <html> element
468    site.nextChild = 0;
469}
470
471bool HTMLConstructionSite::shouldFosterParent() const
472{
473    return m_redirectAttachToFosterParent
474        && causesFosterParenting(currentElement()->tagQName());
475}
476
477void HTMLConstructionSite::fosterParent(Node* node)
478{
479    AttachmentSite site;
480    findFosterSite(site);
481    attachAtSite(site, node);
482}
483
484}
485