1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "HTMLTreeBuilder.h"
29
30#include "Comment.h"
31#include "DocumentFragment.h"
32#include "DocumentType.h"
33#include "Element.h"
34#include "Frame.h"
35#include "HTMLDocument.h"
36#include "HTMLElementFactory.h"
37#include "HTMLFormElement.h"
38#include "HTMLHtmlElement.h"
39#include "HTMLNames.h"
40#include "HTMLScriptElement.h"
41#include "HTMLToken.h"
42#include "HTMLTokenizer.h"
43#include "LocalizedStrings.h"
44#if ENABLE(MATHML)
45#include "MathMLNames.h"
46#endif
47#include "NotImplemented.h"
48#if ENABLE(SVG)
49#include "SVGNames.h"
50#endif
51#include "ScriptController.h"
52#include "Settings.h"
53#include "Text.h"
54#include <wtf/UnusedParam.h>
55
56namespace WebCore {
57
58using namespace HTMLNames;
59
60namespace {
61
62bool hasImpliedEndTag(ContainerNode* node)
63{
64    return node->hasTagName(ddTag)
65        || node->hasTagName(dtTag)
66        || node->hasTagName(liTag)
67        || node->hasTagName(optionTag)
68        || node->hasTagName(optgroupTag)
69        || node->hasTagName(pTag)
70        || node->hasTagName(rpTag)
71        || node->hasTagName(rtTag);
72}
73
74bool causesFosterParenting(const QualifiedName& tagName)
75{
76    return tagName == tableTag
77        || tagName == tbodyTag
78        || tagName == tfootTag
79        || tagName == theadTag
80        || tagName == trTag;
81}
82
83} // namespace
84
85template<typename ChildType>
86PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* rawParent, PassRefPtr<ChildType> prpChild)
87{
88    RefPtr<ChildType> child = prpChild;
89    RefPtr<ContainerNode> parent = rawParent;
90
91    // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
92    // redirection to the foster parent but HTMLConstructionSite::attachAtSite
93    // doesn't. It feels like we're missing a concept somehow.
94    if (shouldFosterParent()) {
95        fosterParent(child.get());
96        ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
97        return child.release();
98    }
99
100    parent->parserAddChild(child);
101
102    // An event handler (DOM Mutation, beforeload, et al.) could have removed
103    // the child, in which case we shouldn't try attaching it.
104    if (!child->parentNode())
105        return child.release();
106
107    if (parent->attached() && !child->attached())
108        child->attach();
109    return child.release();
110}
111
112void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
113{
114    // FIXME: It's unfortunate that we need to hold a reference to child
115    // here to call attach().  We should investigate whether we can rely on
116    // |site.parent| to hold a ref at this point.
117    RefPtr<Node> child = prpChild;
118
119    if (site.nextChild)
120        site.parent->parserInsertBefore(child, site.nextChild);
121    else
122        site.parent->parserAddChild(child);
123
124    // JavaScript run from beforeload (or DOM Mutation or event handlers)
125    // might have removed the child, in which case we should not attach it.
126    if (child->parentNode() && site.parent->attached() && !child->attached())
127        child->attach();
128}
129
130HTMLConstructionSite::HTMLConstructionSite(Document* document)
131    : m_document(document)
132    , m_attachmentRoot(document)
133    , m_fragmentScriptingPermission(FragmentScriptingAllowed)
134    , m_isParsingFragment(false)
135    , m_redirectAttachToFosterParent(false)
136{
137}
138
139HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
140    : m_document(fragment->document())
141    , m_attachmentRoot(fragment)
142    , m_fragmentScriptingPermission(scriptingPermission)
143    , m_isParsingFragment(true)
144    , m_redirectAttachToFosterParent(false)
145{
146}
147
148HTMLConstructionSite::~HTMLConstructionSite()
149{
150}
151
152void HTMLConstructionSite::detach()
153{
154    m_document = 0;
155    m_attachmentRoot = 0;
156}
157
158void HTMLConstructionSite::setForm(HTMLFormElement* form)
159{
160    // This method should only be needed for HTMLTreeBuilder in the fragment case.
161    ASSERT(!m_form);
162    m_form = form;
163}
164
165PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
166{
167    return m_form.release();
168}
169
170void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
171{
172    ASSERT(m_document);
173    if (m_document->frame() && !m_isParsingFragment)
174        m_document->frame()->loader()->dispatchDocumentElementAvailable();
175}
176
177void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
178{
179    RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
180    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
181    m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
182#if ENABLE(OFFLINE_WEB_APPLICATIONS)
183    element->insertedByParser();
184#endif
185    dispatchDocumentElementAvailableIfNeeded();
186}
187
188void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
189{
190    if (!token.attributes())
191        return;
192
193    NamedNodeMap* attributes = element->attributes(false);
194    for (unsigned i = 0; i < token.attributes()->length(); ++i) {
195        Attribute* attribute = token.attributes()->attributeItem(i);
196        if (!attributes->getAttributeItem(attribute->name()))
197            element->setAttribute(attribute->name(), attribute->value());
198    }
199}
200
201void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
202{
203    // FIXME: parse error
204
205    // Fragments do not have a root HTML element, so any additional HTML elements
206    // encountered during fragment parsing should be ignored.
207    if (m_isParsingFragment)
208        return;
209
210    mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
211}
212
213void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
214{
215    // FIXME: parse error
216    mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
217}
218
219void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
220{
221    ASSERT(token.type() == HTMLToken::DOCTYPE);
222    attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
223
224    // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
225    // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
226    // because context-less fragments can determine their own quirks mode, and thus change
227    // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
228    // in a fragment, as changing the owning document's compatibility mode would be wrong.
229    ASSERT(!m_isParsingFragment);
230    if (m_isParsingFragment)
231        return;
232
233    if (token.forceQuirks())
234        m_document->setCompatibilityMode(Document::QuirksMode);
235    else
236        m_document->setCompatibilityModeFromDoctype();
237}
238
239void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
240{
241    ASSERT(token.type() == HTMLToken::Comment);
242    attach(currentNode(), Comment::create(currentNode()->document(), token.comment()));
243}
244
245void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
246{
247    ASSERT(token.type() == HTMLToken::Comment);
248    attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
249}
250
251void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
252{
253    ASSERT(token.type() == HTMLToken::Comment);
254    ContainerNode* parent = m_openElements.rootNode();
255    attach(parent, Comment::create(parent->document(), token.comment()));
256}
257
258PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
259{
260    return attach(currentNode(), child);
261}
262
263void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
264{
265    ASSERT(!shouldFosterParent());
266    m_head = attachToCurrent(createHTMLElement(token));
267    m_openElements.pushHTMLHeadElement(m_head);
268}
269
270void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
271{
272    ASSERT(!shouldFosterParent());
273    m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
274}
275
276void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
277{
278    RefPtr<Element> element = createHTMLElement(token);
279    ASSERT(element->hasTagName(formTag));
280    RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
281    form->setDemoted(isDemoted);
282    m_openElements.push(attachToCurrent(form.release()));
283    ASSERT(currentElement()->isHTMLElement());
284    ASSERT(currentElement()->hasTagName(formTag));
285    m_form = static_cast<HTMLFormElement*>(currentElement());
286}
287
288void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
289{
290    m_openElements.push(attachToCurrent(createHTMLElement(token)));
291}
292
293void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
294{
295    ASSERT(token.type() == HTMLToken::StartTag);
296    RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
297    // Normally HTMLElementStack is responsible for calling finishParsingChildren,
298    // but self-closing elements are never in the element stack so the stack
299    // doesn't get a chance to tell them that we're done parsing their children.
300    element->finishParsingChildren();
301    // FIXME: Do we want to acknowledge the token's self-closing flag?
302    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
303}
304
305void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
306{
307    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
308    // Possible active formatting elements include:
309    // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
310    insertHTMLElement(token);
311    m_activeFormattingElements.append(currentElement());
312}
313
314void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
315{
316    RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true);
317    if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
318        element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
319    m_openElements.push(attachToCurrent(element.release()));
320}
321
322void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
323{
324    ASSERT(token.type() == HTMLToken::StartTag);
325    notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
326
327    RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
328    if (!token.selfClosing())
329        m_openElements.push(element);
330}
331
332void HTMLConstructionSite::insertTextNode(const String& characters)
333{
334    AttachmentSite site;
335    site.parent = currentNode();
336    site.nextChild = 0;
337    if (shouldFosterParent())
338        findFosterSite(site);
339
340    unsigned currentPosition = 0;
341
342    // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
343    // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
344
345    Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
346    if (previousChild && previousChild->isTextNode()) {
347        // FIXME: We're only supposed to append to this text node if it
348        // was the last text node inserted by the parser.
349        CharacterData* textNode = static_cast<CharacterData*>(previousChild);
350        currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit);
351    }
352
353    while (currentPosition < characters.length()) {
354        RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition);
355        // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
356        if (!textNode->length())
357            textNode = Text::create(site.parent->document(), characters.substring(currentPosition));
358
359        currentPosition += textNode->length();
360        ASSERT(currentPosition <= characters.length());
361        attachAtSite(site, textNode.release());
362    }
363}
364
365PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
366{
367    QualifiedName tagName(nullAtom, token.name(), namespaceURI);
368    RefPtr<Element> element = currentNode()->document()->createElement(tagName, true);
369    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
370    return element.release();
371}
372
373PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
374{
375    QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
376    // FIXME: This can't use HTMLConstructionSite::createElement because we
377    // have to pass the current form element.  We should rework form association
378    // to occur after construction to allow better code sharing here.
379    RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true);
380    element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
381    ASSERT(element->isHTMLElement());
382    return element.release();
383}
384
385PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
386{
387    return createHTMLElementFromSavedElement(record->element());
388}
389
390namespace {
391
392PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
393{
394    NamedNodeMap* attributes = element->attributes(true);
395    if (!attributes)
396        return 0;
397
398    RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
399    for (size_t i = 0; i < attributes->length(); ++i) {
400        Attribute* attribute = attributes->attributeItem(i);
401        RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
402        newAttributes->addAttribute(clone);
403    }
404    return newAttributes.release();
405}
406
407}
408
409PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
410{
411    // FIXME: This method is wrong.  We should be using the original token.
412    // Using an Element* causes us to fail examples like this:
413    // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
414    // When reconstructTheActiveFormattingElements calls this method to open
415    // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
416    // spec implies it should be "1".  Minefield matches the HTML5 spec here.
417
418    ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
419    AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
420    return createHTMLElement(fakeToken);
421}
422
423bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
424{
425    if (m_activeFormattingElements.isEmpty())
426        return false;
427    unsigned index = m_activeFormattingElements.size();
428    do {
429        --index;
430        const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
431        if (entry.isMarker() || m_openElements.contains(entry.element())) {
432            firstUnopenElementIndex = index + 1;
433            return firstUnopenElementIndex < m_activeFormattingElements.size();
434        }
435    } while (index);
436    firstUnopenElementIndex = index;
437    return true;
438}
439
440void HTMLConstructionSite::reconstructTheActiveFormattingElements()
441{
442    unsigned firstUnopenElementIndex;
443    if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
444        return;
445
446    unsigned unopenEntryIndex = firstUnopenElementIndex;
447    ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
448    for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
449        HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
450        RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
451        m_openElements.push(attachToCurrent(reconstructed.release()));
452        unopenedEntry.replaceElement(currentElement());
453    }
454}
455
456void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
457{
458    while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName))
459        m_openElements.pop();
460}
461
462void HTMLConstructionSite::generateImpliedEndTags()
463{
464    while (hasImpliedEndTag(currentNode()))
465        m_openElements.pop();
466}
467
468void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
469{
470    HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
471    if (lastTableElementRecord) {
472        Element* lastTableElement = lastTableElementRecord->element();
473        if (ContainerNode* parent = lastTableElement->parentNode()) {
474            site.parent = parent;
475            site.nextChild = lastTableElement;
476            return;
477        }
478        site.parent = lastTableElementRecord->next()->element();
479        site.nextChild = 0;
480        return;
481    }
482    // Fragment case
483    site.parent = m_openElements.rootNode(); // DocumentFragment
484    site.nextChild = 0;
485}
486
487bool HTMLConstructionSite::shouldFosterParent() const
488{
489    return m_redirectAttachToFosterParent
490        && currentNode()->isElementNode()
491        && causesFosterParenting(currentElement()->tagQName());
492}
493
494void HTMLConstructionSite::fosterParent(Node* node)
495{
496    AttachmentSite site;
497    findFosterSite(site);
498    attachAtSite(site, node);
499}
500
501}
502