1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "HTMLTreeBuilder.h"
29
30#include "Comment.h"
31#include "DOMWindow.h"
32#include "DocumentFragment.h"
33#include "DocumentType.h"
34#include "Frame.h"
35#include "HTMLDocument.h"
36#include "HTMLDocumentParser.h"
37#include "HTMLElementFactory.h"
38#include "HTMLFormElement.h"
39#include "HTMLHtmlElement.h"
40#include "HTMLNames.h"
41#include "HTMLParserIdioms.h"
42#include "HTMLScriptElement.h"
43#include "HTMLToken.h"
44#include "HTMLTokenizer.h"
45#include "LocalizedStrings.h"
46#include "MathMLNames.h"
47#include "NotImplemented.h"
48#include "SVGNames.h"
49#include "ScriptController.h"
50#include "Text.h"
51#include "XLinkNames.h"
52#include "XMLNSNames.h"
53#include "XMLNames.h"
54#include <wtf/unicode/CharacterNames.h>
55
56namespace WebCore {
57
58using namespace HTMLNames;
59
60static const int uninitializedLineNumberValue = -1;
61
62static TextPosition1 uninitializedPositionValue1()
63{
64    return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
65}
66
67namespace {
68
69inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
70{
71    return isHTMLSpace(character) || character == replacementCharacter;
72}
73
74inline bool isAllWhitespace(const String& string)
75{
76    return string.isAllSpecialCharacters<isHTMLSpace>();
77}
78
79inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
80{
81    return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
82}
83
84bool isNumberedHeaderTag(const AtomicString& tagName)
85{
86    return tagName == h1Tag
87        || tagName == h2Tag
88        || tagName == h3Tag
89        || tagName == h4Tag
90        || tagName == h5Tag
91        || tagName == h6Tag;
92}
93
94bool isCaptionColOrColgroupTag(const AtomicString& tagName)
95{
96    return tagName == captionTag
97        || tagName == colTag
98        || tagName == colgroupTag;
99}
100
101bool isTableCellContextTag(const AtomicString& tagName)
102{
103    return tagName == thTag || tagName == tdTag;
104}
105
106bool isTableBodyContextTag(const AtomicString& tagName)
107{
108    return tagName == tbodyTag
109        || tagName == tfootTag
110        || tagName == theadTag;
111}
112
113// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
114bool isSpecialNode(Node* node)
115{
116    if (node->hasTagName(MathMLNames::miTag)
117        || node->hasTagName(MathMLNames::moTag)
118        || node->hasTagName(MathMLNames::mnTag)
119        || node->hasTagName(MathMLNames::msTag)
120        || node->hasTagName(MathMLNames::mtextTag)
121        || node->hasTagName(MathMLNames::annotation_xmlTag)
122        || node->hasTagName(SVGNames::foreignObjectTag)
123        || node->hasTagName(SVGNames::descTag)
124        || node->hasTagName(SVGNames::titleTag))
125        return true;
126    if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
127        return true;
128    if (!isInHTMLNamespace(node))
129        return false;
130    const AtomicString& tagName = node->localName();
131    return tagName == addressTag
132        || tagName == appletTag
133        || tagName == areaTag
134        || tagName == articleTag
135        || tagName == asideTag
136        || tagName == baseTag
137        || tagName == basefontTag
138        || tagName == bgsoundTag
139        || tagName == blockquoteTag
140        || tagName == bodyTag
141        || tagName == brTag
142        || tagName == buttonTag
143        || tagName == captionTag
144        || tagName == centerTag
145        || tagName == colTag
146        || tagName == colgroupTag
147        || tagName == commandTag
148        || tagName == ddTag
149        || tagName == detailsTag
150        || tagName == dirTag
151        || tagName == divTag
152        || tagName == dlTag
153        || tagName == dtTag
154        || tagName == embedTag
155        || tagName == fieldsetTag
156        || tagName == figcaptionTag
157        || tagName == figureTag
158        || tagName == footerTag
159        || tagName == formTag
160        || tagName == frameTag
161        || tagName == framesetTag
162        || isNumberedHeaderTag(tagName)
163        || tagName == headTag
164        || tagName == headerTag
165        || tagName == hgroupTag
166        || tagName == hrTag
167        || tagName == htmlTag
168        || tagName == iframeTag
169        || tagName == imgTag
170        || tagName == inputTag
171        || tagName == isindexTag
172        || tagName == liTag
173        || tagName == linkTag
174        || tagName == listingTag
175        || tagName == marqueeTag
176        || tagName == menuTag
177        || tagName == metaTag
178        || tagName == navTag
179        || tagName == noembedTag
180        || tagName == noframesTag
181        || tagName == noscriptTag
182        || tagName == objectTag
183        || tagName == olTag
184        || tagName == pTag
185        || tagName == paramTag
186        || tagName == plaintextTag
187        || tagName == preTag
188        || tagName == scriptTag
189        || tagName == sectionTag
190        || tagName == selectTag
191        || tagName == styleTag
192        || tagName == summaryTag
193        || tagName == tableTag
194        || isTableBodyContextTag(tagName)
195        || tagName == tdTag
196        || tagName == textareaTag
197        || tagName == thTag
198        || tagName == titleTag
199        || tagName == trTag
200        || tagName == ulTag
201        || tagName == wbrTag
202        || tagName == xmpTag;
203}
204
205bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
206{
207    return tagName == bTag
208        || tagName == bigTag
209        || tagName == codeTag
210        || tagName == emTag
211        || tagName == fontTag
212        || tagName == iTag
213        || tagName == sTag
214        || tagName == smallTag
215        || tagName == strikeTag
216        || tagName == strongTag
217        || tagName == ttTag
218        || tagName == uTag;
219}
220
221bool isNonAnchorFormattingTag(const AtomicString& tagName)
222{
223    return tagName == nobrTag
224        || isNonAnchorNonNobrFormattingTag(tagName);
225}
226
227// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
228bool isFormattingTag(const AtomicString& tagName)
229{
230    return tagName == aTag || isNonAnchorFormattingTag(tagName);
231}
232
233HTMLFormElement* closestFormAncestor(Element* element)
234{
235    while (element) {
236        if (element->hasTagName(formTag))
237            return static_cast<HTMLFormElement*>(element);
238        ContainerNode* parent = element->parentNode();
239        if (!parent || !parent->isElementNode())
240            return 0;
241        element = static_cast<Element*>(parent);
242    }
243    return 0;
244}
245
246} // namespace
247
248class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
249    WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
250public:
251    explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
252        : m_current(token.characters().data())
253        , m_end(m_current + token.characters().size())
254    {
255        ASSERT(!isEmpty());
256    }
257
258    explicit ExternalCharacterTokenBuffer(const String& string)
259        : m_current(string.characters())
260        , m_end(m_current + string.length())
261    {
262        ASSERT(!isEmpty());
263    }
264
265    ~ExternalCharacterTokenBuffer()
266    {
267        ASSERT(isEmpty());
268    }
269
270    bool isEmpty() const { return m_current == m_end; }
271
272    void skipLeadingWhitespace()
273    {
274        skipLeading<isHTMLSpace>();
275    }
276
277    String takeLeadingWhitespace()
278    {
279        return takeLeading<isHTMLSpace>();
280    }
281
282    String takeLeadingNonWhitespace()
283    {
284        return takeLeading<isNotHTMLSpace>();
285    }
286
287    String takeRemaining()
288    {
289        ASSERT(!isEmpty());
290        const UChar* start = m_current;
291        m_current = m_end;
292        return String(start, m_current - start);
293    }
294
295    void giveRemainingTo(Vector<UChar>& recipient)
296    {
297        recipient.append(m_current, m_end - m_current);
298        m_current = m_end;
299    }
300
301    String takeRemainingWhitespace()
302    {
303        ASSERT(!isEmpty());
304        Vector<UChar> whitespace;
305        do {
306            UChar cc = *m_current++;
307            if (isHTMLSpace(cc))
308                whitespace.append(cc);
309        } while (m_current < m_end);
310        // Returning the null string when there aren't any whitespace
311        // characters is slightly cleaner semantically because we don't want
312        // to insert a text node (as opposed to inserting an empty text node).
313        if (whitespace.isEmpty())
314            return String();
315        return String::adopt(whitespace);
316    }
317
318private:
319    template<bool characterPredicate(UChar)>
320    void skipLeading()
321    {
322        ASSERT(!isEmpty());
323        while (characterPredicate(*m_current)) {
324            if (++m_current == m_end)
325                return;
326        }
327    }
328
329    template<bool characterPredicate(UChar)>
330    String takeLeading()
331    {
332        ASSERT(!isEmpty());
333        const UChar* start = m_current;
334        skipLeading<characterPredicate>();
335        if (start == m_current)
336            return String();
337        return String(start, m_current - start);
338    }
339
340    const UChar* m_current;
341    const UChar* m_end;
342};
343
344
345HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
346    : m_framesetOk(true)
347    , m_document(document)
348    , m_tree(document)
349    , m_reportErrors(reportErrors)
350    , m_isPaused(false)
351    , m_insertionMode(InitialMode)
352    , m_originalInsertionMode(InitialMode)
353    , m_parser(parser)
354    , m_scriptToProcessStartPosition(uninitializedPositionValue1())
355    , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
356    , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
357    , m_hasPendingForeignInsertionModeSteps(false)
358{
359}
360
361// FIXME: Member variables should be grouped into self-initializing structs to
362// minimize code duplication between these constructors.
363HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
364    : m_framesetOk(true)
365    , m_fragmentContext(fragment, contextElement, scriptingPermission)
366    , m_document(fragment->document())
367    , m_tree(fragment, scriptingPermission)
368    , m_reportErrors(false) // FIXME: Why not report errors in fragments?
369    , m_isPaused(false)
370    , m_insertionMode(InitialMode)
371    , m_originalInsertionMode(InitialMode)
372    , m_parser(parser)
373    , m_scriptToProcessStartPosition(uninitializedPositionValue1())
374    , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
375    , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
376    , m_hasPendingForeignInsertionModeSteps(false)
377{
378    if (contextElement) {
379        // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
380        // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
381        // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
382        // and instead use the DocumentFragment as a root node.
383        m_tree.openElements()->pushRootNode(fragment);
384        resetInsertionModeAppropriately();
385        m_tree.setForm(closestFormAncestor(contextElement));
386    }
387}
388
389HTMLTreeBuilder::~HTMLTreeBuilder()
390{
391}
392
393void HTMLTreeBuilder::detach()
394{
395    // This call makes little sense in fragment mode, but for consistency
396    // DocumentParser expects detach() to always be called before it's destroyed.
397    m_document = 0;
398    // HTMLConstructionSite might be on the callstack when detach() is called
399    // otherwise we'd just call m_tree.clear() here instead.
400    m_tree.detach();
401}
402
403HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
404    : m_fragment(0)
405    , m_contextElement(0)
406    , m_scriptingPermission(FragmentScriptingAllowed)
407{
408}
409
410HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
411    : m_fragment(fragment)
412    , m_contextElement(contextElement)
413    , m_scriptingPermission(scriptingPermission)
414{
415    ASSERT(!fragment->hasChildNodes());
416}
417
418HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
419{
420}
421
422PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
423{
424    // Unpause ourselves, callers may pause us again when processing the script.
425    // The HTML5 spec is written as though scripts are executed inside the tree
426    // builder.  We pause the parser to exit the tree builder, and then resume
427    // before running scripts.
428    m_isPaused = false;
429    scriptStartPosition = m_scriptToProcessStartPosition;
430    m_scriptToProcessStartPosition = uninitializedPositionValue1();
431    return m_scriptToProcess.release();
432}
433
434void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
435{
436    AtomicHTMLToken token(rawToken);
437
438    // We clear the rawToken in case constructTreeFromAtomicToken
439    // synchronously re-enters the parser. We don't clear the token immedately
440    // for Character tokens because the AtomicHTMLToken avoids copying the
441    // characters by keeping a pointer to the underlying buffer in the
442    // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
443    // the parser.
444    //
445    // FIXME: Top clearing the rawToken once we start running the parser off
446    // the main thread or once we stop allowing synchronous JavaScript
447    // execution from parseMappedAttribute.
448    if (rawToken.type() != HTMLToken::Character)
449        rawToken.clear();
450
451    constructTreeFromAtomicToken(token);
452
453    if (!rawToken.isUninitialized()) {
454        ASSERT(rawToken.type() == HTMLToken::Character);
455        rawToken.clear();
456    }
457}
458
459void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
460{
461    processToken(token);
462
463    // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
464    // the U+0000 characters into replacement characters has compatibility
465    // problems.
466    m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
467    m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && !isInHTMLNamespace(m_tree.currentNode()));
468}
469
470void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
471{
472    switch (token.type()) {
473    case HTMLToken::Uninitialized:
474        ASSERT_NOT_REACHED();
475        break;
476    case HTMLToken::DOCTYPE:
477        processDoctypeToken(token);
478        break;
479    case HTMLToken::StartTag:
480        processStartTag(token);
481        break;
482    case HTMLToken::EndTag:
483        processEndTag(token);
484        break;
485    case HTMLToken::Comment:
486        processComment(token);
487        return;
488    case HTMLToken::Character:
489        processCharacter(token);
490        break;
491    case HTMLToken::EndOfFile:
492        processEndOfFile(token);
493        break;
494    }
495}
496
497void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
498{
499    ASSERT(token.type() == HTMLToken::DOCTYPE);
500    if (m_insertionMode == InitialMode) {
501        m_tree.insertDoctype(token);
502        setInsertionMode(BeforeHTMLMode);
503        return;
504    }
505    if (m_insertionMode == InTableTextMode) {
506        defaultForInTableText();
507        processDoctypeToken(token);
508        return;
509    }
510    parseError(token);
511}
512
513void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
514{
515    // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
516    AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
517    processStartTag(fakeToken);
518}
519
520void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
521{
522    // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
523    AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
524    processEndTag(fakeToken);
525}
526
527void HTMLTreeBuilder::processFakeCharacters(const String& characters)
528{
529    ASSERT(!characters.isEmpty());
530    ExternalCharacterTokenBuffer buffer(characters);
531    processCharacterBuffer(buffer);
532}
533
534void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
535{
536    if (!m_tree.openElements()->inButtonScope(pTag.localName()))
537        return;
538    AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
539    processEndTag(endP);
540}
541
542PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
543{
544    RefPtr<NamedNodeMap> attributes = token.takeAtributes();
545    if (!attributes)
546        attributes = NamedNodeMap::create();
547    else {
548        attributes->removeAttribute(nameAttr);
549        attributes->removeAttribute(actionAttr);
550        attributes->removeAttribute(promptAttr);
551    }
552
553    RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
554    attributes->insertAttribute(mappedAttribute.release(), false);
555    return attributes.release();
556}
557
558void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
559{
560    ASSERT(token.type() == HTMLToken::StartTag);
561    ASSERT(token.name() == isindexTag);
562    parseError(token);
563    if (m_tree.form())
564        return;
565    notImplemented(); // Acknowledge self-closing flag
566    processFakeStartTag(formTag);
567    RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
568    if (actionAttribute) {
569        ASSERT(m_tree.currentElement()->hasTagName(formTag));
570        m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
571    }
572    processFakeStartTag(hrTag);
573    processFakeStartTag(labelTag);
574    RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
575    if (promptAttribute)
576        processFakeCharacters(promptAttribute->value());
577    else
578        processFakeCharacters(searchableIndexIntroduction());
579    processFakeStartTag(inputTag, attributesForIsindexInput(token));
580    notImplemented(); // This second set of characters may be needed by non-english locales.
581    processFakeEndTag(labelTag);
582    processFakeStartTag(hrTag);
583    processFakeEndTag(formTag);
584}
585
586namespace {
587
588bool isLi(const ContainerNode* element)
589{
590    return element->hasTagName(liTag);
591}
592
593bool isDdOrDt(const ContainerNode* element)
594{
595    return element->hasTagName(ddTag)
596        || element->hasTagName(dtTag);
597}
598
599}
600
601template <bool shouldClose(const ContainerNode*)>
602void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
603{
604    m_framesetOk = false;
605    HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
606    while (1) {
607        ContainerNode* node = nodeRecord->node();
608        if (shouldClose(node)) {
609            ASSERT(node->isElementNode());
610            processFakeEndTag(toElement(node)->tagQName());
611            break;
612        }
613        if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
614            break;
615        nodeRecord = nodeRecord->next();
616    }
617    processFakePEndTagIfPInButtonScope();
618    m_tree.insertHTMLElement(token);
619}
620
621namespace {
622
623typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
624
625void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
626{
627    for (size_t i = 0; i < length; ++i) {
628        const QualifiedName& name = *names[i];
629        const AtomicString& localName = name.localName();
630        AtomicString loweredLocalName = localName.lower();
631        if (loweredLocalName != localName)
632            map->add(loweredLocalName, name);
633    }
634}
635
636void adjustSVGTagNameCase(AtomicHTMLToken& token)
637{
638    static PrefixedNameToQualifiedNameMap* caseMap = 0;
639    if (!caseMap) {
640        caseMap = new PrefixedNameToQualifiedNameMap;
641        size_t length = 0;
642        QualifiedName** svgTags = SVGNames::getSVGTags(&length);
643        mapLoweredLocalNameToName(caseMap, svgTags, length);
644    }
645
646    const QualifiedName& casedName = caseMap->get(token.name());
647    if (casedName.localName().isNull())
648        return;
649    token.setName(casedName.localName());
650}
651
652template<QualifiedName** getAttrs(size_t* length)>
653void adjustAttributes(AtomicHTMLToken& token)
654{
655    static PrefixedNameToQualifiedNameMap* caseMap = 0;
656    if (!caseMap) {
657        caseMap = new PrefixedNameToQualifiedNameMap;
658        size_t length = 0;
659        QualifiedName** attrs = getAttrs(&length);
660        mapLoweredLocalNameToName(caseMap, attrs, length);
661    }
662
663    NamedNodeMap* attributes = token.attributes();
664    if (!attributes)
665        return;
666
667    for (unsigned x = 0; x < attributes->length(); ++x) {
668        Attribute* attribute = attributes->attributeItem(x);
669        const QualifiedName& casedName = caseMap->get(attribute->localName());
670        if (!casedName.localName().isNull())
671            attribute->parserSetName(casedName);
672    }
673}
674
675void adjustSVGAttributes(AtomicHTMLToken& token)
676{
677    adjustAttributes<SVGNames::getSVGAttrs>(token);
678}
679
680void adjustMathMLAttributes(AtomicHTMLToken& token)
681{
682    adjustAttributes<MathMLNames::getMathMLAttrs>(token);
683}
684
685void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
686{
687    for (size_t i = 0; i < length; ++i) {
688        QualifiedName* name = names[i];
689        const AtomicString& localName = name->localName();
690        AtomicString prefixColonLocalName(prefix + ":" + localName);
691        QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
692        map->add(prefixColonLocalName, nameWithPrefix);
693    }
694}
695
696void adjustForeignAttributes(AtomicHTMLToken& token)
697{
698    static PrefixedNameToQualifiedNameMap* map = 0;
699    if (!map) {
700        map = new PrefixedNameToQualifiedNameMap;
701        size_t length = 0;
702        QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
703        addNamesWithPrefix(map, "xlink", attrs, length);
704
705        attrs = XMLNames::getXMLAttrs(&length);
706        addNamesWithPrefix(map, "xml", attrs, length);
707
708        map->add("xmlns", XMLNSNames::xmlnsAttr);
709        map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
710    }
711
712    NamedNodeMap* attributes = token.attributes();
713    if (!attributes)
714        return;
715
716    for (unsigned x = 0; x < attributes->length(); ++x) {
717        Attribute* attribute = attributes->attributeItem(x);
718        const QualifiedName& name = map->get(attribute->localName());
719        if (!name.localName().isNull())
720            attribute->parserSetName(name);
721    }
722}
723
724}
725
726void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
727{
728    ASSERT(token.type() == HTMLToken::StartTag);
729    if (token.name() == htmlTag) {
730        m_tree.insertHTMLHtmlStartTagInBody(token);
731        return;
732    }
733    if (token.name() == baseTag
734        || token.name() == basefontTag
735        || token.name() == bgsoundTag
736        || token.name() == commandTag
737        || token.name() == linkTag
738        || token.name() == metaTag
739        || token.name() == noframesTag
740        || token.name() == scriptTag
741        || token.name() == styleTag
742        || token.name() == titleTag) {
743        bool didProcess = processStartTagForInHead(token);
744        ASSERT_UNUSED(didProcess, didProcess);
745        return;
746    }
747    if (token.name() == bodyTag) {
748        if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
749            ASSERT(isParsingFragment());
750            return;
751        }
752        m_tree.insertHTMLBodyStartTagInBody(token);
753        return;
754    }
755    if (token.name() == framesetTag) {
756        parseError(token);
757        if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
758            ASSERT(isParsingFragment());
759            return;
760        }
761        if (!m_framesetOk)
762            return;
763        ExceptionCode ec = 0;
764        m_tree.openElements()->bodyElement()->remove(ec);
765        ASSERT(!ec);
766        m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
767        m_tree.openElements()->popHTMLBodyElement();
768        ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
769        m_tree.insertHTMLElement(token);
770        setInsertionMode(InFramesetMode);
771        return;
772    }
773    if (token.name() == addressTag
774        || token.name() == articleTag
775        || token.name() == asideTag
776        || token.name() == blockquoteTag
777        || token.name() == centerTag
778        || token.name() == detailsTag
779        || token.name() == dirTag
780        || token.name() == divTag
781        || token.name() == dlTag
782        || token.name() == fieldsetTag
783        || token.name() == figcaptionTag
784        || token.name() == figureTag
785        || token.name() == footerTag
786        || token.name() == headerTag
787        || token.name() == hgroupTag
788        || token.name() == menuTag
789        || token.name() == navTag
790        || token.name() == olTag
791        || token.name() == pTag
792        || token.name() == sectionTag
793        || token.name() == summaryTag
794        || token.name() == ulTag) {
795        processFakePEndTagIfPInButtonScope();
796        m_tree.insertHTMLElement(token);
797        return;
798    }
799    if (isNumberedHeaderTag(token.name())) {
800        processFakePEndTagIfPInButtonScope();
801        if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
802            parseError(token);
803            m_tree.openElements()->pop();
804        }
805        m_tree.insertHTMLElement(token);
806        return;
807    }
808    if (token.name() == preTag || token.name() == listingTag) {
809        processFakePEndTagIfPInButtonScope();
810        m_tree.insertHTMLElement(token);
811        m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
812        m_framesetOk = false;
813        return;
814    }
815    if (token.name() == formTag) {
816        if (m_tree.form()) {
817            parseError(token);
818            return;
819        }
820        processFakePEndTagIfPInButtonScope();
821        m_tree.insertHTMLFormElement(token);
822        return;
823    }
824    if (token.name() == liTag) {
825        processCloseWhenNestedTag<isLi>(token);
826        return;
827    }
828    if (token.name() == ddTag || token.name() == dtTag) {
829        processCloseWhenNestedTag<isDdOrDt>(token);
830        return;
831    }
832    if (token.name() == plaintextTag) {
833        processFakePEndTagIfPInButtonScope();
834        m_tree.insertHTMLElement(token);
835        m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
836        return;
837    }
838    if (token.name() == buttonTag) {
839        if (m_tree.openElements()->inScope(buttonTag)) {
840            parseError(token);
841            processFakeEndTag(buttonTag);
842            reprocessStartTag(token); // FIXME: Could we just fall through here?
843            return;
844        }
845        m_tree.reconstructTheActiveFormattingElements();
846        m_tree.insertHTMLElement(token);
847        m_framesetOk = false;
848        return;
849    }
850    if (token.name() == aTag) {
851        Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
852        if (activeATag) {
853            parseError(token);
854            processFakeEndTag(aTag);
855            m_tree.activeFormattingElements()->remove(activeATag);
856            if (m_tree.openElements()->contains(activeATag))
857                m_tree.openElements()->remove(activeATag);
858        }
859        m_tree.reconstructTheActiveFormattingElements();
860        m_tree.insertFormattingElement(token);
861        return;
862    }
863    if (isNonAnchorNonNobrFormattingTag(token.name())) {
864        m_tree.reconstructTheActiveFormattingElements();
865        m_tree.insertFormattingElement(token);
866        return;
867    }
868    if (token.name() == nobrTag) {
869        m_tree.reconstructTheActiveFormattingElements();
870        if (m_tree.openElements()->inScope(nobrTag)) {
871            parseError(token);
872            processFakeEndTag(nobrTag);
873            m_tree.reconstructTheActiveFormattingElements();
874        }
875        m_tree.insertFormattingElement(token);
876        return;
877    }
878    if (token.name() == appletTag
879        || token.name() == marqueeTag
880        || token.name() == objectTag) {
881        m_tree.reconstructTheActiveFormattingElements();
882        m_tree.insertHTMLElement(token);
883        m_tree.activeFormattingElements()->appendMarker();
884        m_framesetOk = false;
885        return;
886    }
887    if (token.name() == tableTag) {
888        if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
889            processFakeEndTag(pTag);
890        m_tree.insertHTMLElement(token);
891        m_framesetOk = false;
892        setInsertionMode(InTableMode);
893        return;
894    }
895    if (token.name() == imageTag) {
896        parseError(token);
897        // Apparently we're not supposed to ask.
898        token.setName(imgTag.localName());
899        prepareToReprocessToken();
900        // Note the fall through to the imgTag handling below!
901    }
902    if (token.name() == areaTag
903        || token.name() == brTag
904        || token.name() == embedTag
905        || token.name() == imgTag
906        || token.name() == keygenTag
907        || token.name() == wbrTag) {
908        m_tree.reconstructTheActiveFormattingElements();
909        m_tree.insertSelfClosingHTMLElement(token);
910        m_framesetOk = false;
911        return;
912    }
913    if (token.name() == inputTag) {
914        RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
915        m_tree.reconstructTheActiveFormattingElements();
916        m_tree.insertSelfClosingHTMLElement(token);
917        if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
918            m_framesetOk = false;
919        return;
920    }
921    if (token.name() == paramTag
922        || token.name() == sourceTag
923        || token.name() == trackTag) {
924        m_tree.insertSelfClosingHTMLElement(token);
925        return;
926    }
927    if (token.name() == hrTag) {
928        processFakePEndTagIfPInButtonScope();
929        m_tree.insertSelfClosingHTMLElement(token);
930        m_framesetOk = false;
931        return;
932    }
933    if (token.name() == isindexTag) {
934        processIsindexStartTagForInBody(token);
935        return;
936    }
937    if (token.name() == textareaTag) {
938        m_tree.insertHTMLElement(token);
939        m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
940        m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
941        m_originalInsertionMode = m_insertionMode;
942        m_framesetOk = false;
943        setInsertionMode(TextMode);
944        return;
945    }
946    if (token.name() == xmpTag) {
947        processFakePEndTagIfPInButtonScope();
948        m_tree.reconstructTheActiveFormattingElements();
949        m_framesetOk = false;
950        processGenericRawTextStartTag(token);
951        return;
952    }
953    if (token.name() == iframeTag) {
954        m_framesetOk = false;
955        processGenericRawTextStartTag(token);
956        return;
957    }
958    if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
959        processGenericRawTextStartTag(token);
960        return;
961    }
962    if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
963        processGenericRawTextStartTag(token);
964        return;
965    }
966    if (token.name() == selectTag) {
967        m_tree.reconstructTheActiveFormattingElements();
968        m_tree.insertHTMLElement(token);
969        m_framesetOk = false;
970        if (m_insertionMode == InTableMode
971             || m_insertionMode == InCaptionMode
972             || m_insertionMode == InColumnGroupMode
973             || m_insertionMode == InTableBodyMode
974             || m_insertionMode == InRowMode
975             || m_insertionMode == InCellMode)
976            setInsertionMode(InSelectInTableMode);
977        else
978            setInsertionMode(InSelectMode);
979        return;
980    }
981    if (token.name() == optgroupTag || token.name() == optionTag) {
982        if (m_tree.openElements()->inScope(optionTag.localName())) {
983            AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
984            processEndTag(endOption);
985        }
986        m_tree.reconstructTheActiveFormattingElements();
987        m_tree.insertHTMLElement(token);
988        return;
989    }
990    if (token.name() == rpTag || token.name() == rtTag) {
991        if (m_tree.openElements()->inScope(rubyTag.localName())) {
992            m_tree.generateImpliedEndTags();
993            if (!m_tree.currentNode()->hasTagName(rubyTag)) {
994                parseError(token);
995                m_tree.openElements()->popUntil(rubyTag.localName());
996            }
997        }
998        m_tree.insertHTMLElement(token);
999        return;
1000    }
1001    if (token.name() == MathMLNames::mathTag.localName()) {
1002        m_tree.reconstructTheActiveFormattingElements();
1003        adjustMathMLAttributes(token);
1004        adjustForeignAttributes(token);
1005        m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1006        if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1007            setInsertionMode(InForeignContentMode);
1008        return;
1009    }
1010    if (token.name() == SVGNames::svgTag.localName()) {
1011        m_tree.reconstructTheActiveFormattingElements();
1012        adjustSVGAttributes(token);
1013        adjustForeignAttributes(token);
1014        m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1015        if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1016            setInsertionMode(InForeignContentMode);
1017        return;
1018    }
1019    if (isCaptionColOrColgroupTag(token.name())
1020        || token.name() == frameTag
1021        || token.name() == headTag
1022        || isTableBodyContextTag(token.name())
1023        || isTableCellContextTag(token.name())
1024        || token.name() == trTag) {
1025        parseError(token);
1026        return;
1027    }
1028    m_tree.reconstructTheActiveFormattingElements();
1029    m_tree.insertHTMLElement(token);
1030}
1031
1032bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1033{
1034    if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1035        ASSERT(isParsingFragment());
1036        // FIXME: parse error
1037        return false;
1038    }
1039    m_tree.openElements()->pop();
1040    setInsertionMode(InTableMode);
1041    return true;
1042}
1043
1044// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
1045void HTMLTreeBuilder::closeTheCell()
1046{
1047    ASSERT(insertionMode() == InCellMode);
1048    if (m_tree.openElements()->inTableScope(tdTag)) {
1049        ASSERT(!m_tree.openElements()->inTableScope(thTag));
1050        processFakeEndTag(tdTag);
1051        return;
1052    }
1053    ASSERT(m_tree.openElements()->inTableScope(thTag));
1054    processFakeEndTag(thTag);
1055    ASSERT(insertionMode() == InRowMode);
1056}
1057
1058void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1059{
1060    ASSERT(token.type() == HTMLToken::StartTag);
1061    if (token.name() == captionTag) {
1062        m_tree.openElements()->popUntilTableScopeMarker();
1063        m_tree.activeFormattingElements()->appendMarker();
1064        m_tree.insertHTMLElement(token);
1065        setInsertionMode(InCaptionMode);
1066        return;
1067    }
1068    if (token.name() == colgroupTag) {
1069        m_tree.openElements()->popUntilTableScopeMarker();
1070        m_tree.insertHTMLElement(token);
1071        setInsertionMode(InColumnGroupMode);
1072        return;
1073    }
1074    if (token.name() == colTag) {
1075        processFakeStartTag(colgroupTag);
1076        ASSERT(InColumnGroupMode);
1077        reprocessStartTag(token);
1078        return;
1079    }
1080    if (isTableBodyContextTag(token.name())) {
1081        m_tree.openElements()->popUntilTableScopeMarker();
1082        m_tree.insertHTMLElement(token);
1083        setInsertionMode(InTableBodyMode);
1084        return;
1085    }
1086    if (isTableCellContextTag(token.name())
1087        || token.name() == trTag) {
1088        processFakeStartTag(tbodyTag);
1089        ASSERT(insertionMode() == InTableBodyMode);
1090        reprocessStartTag(token);
1091        return;
1092    }
1093    if (token.name() == tableTag) {
1094        parseError(token);
1095        if (!processTableEndTagForInTable()) {
1096            ASSERT(isParsingFragment());
1097            return;
1098        }
1099        reprocessStartTag(token);
1100        return;
1101    }
1102    if (token.name() == styleTag || token.name() == scriptTag) {
1103        processStartTagForInHead(token);
1104        return;
1105    }
1106    if (token.name() == inputTag) {
1107        Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1108        if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1109            parseError(token);
1110            m_tree.insertSelfClosingHTMLElement(token);
1111            return;
1112        }
1113        // Fall through to "anything else" case.
1114    }
1115    if (token.name() == formTag) {
1116        parseError(token);
1117        if (m_tree.form())
1118            return;
1119        m_tree.insertHTMLFormElement(token, true);
1120        m_tree.openElements()->pop();
1121        return;
1122    }
1123    parseError(token);
1124    HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1125    processStartTagForInBody(token);
1126}
1127
1128namespace {
1129
1130bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, ContainerNode* currentElement)
1131{
1132    ASSERT(token.type() == HTMLToken::StartTag);
1133    if (currentElement->hasTagName(MathMLNames::miTag)
1134        || currentElement->hasTagName(MathMLNames::moTag)
1135        || currentElement->hasTagName(MathMLNames::mnTag)
1136        || currentElement->hasTagName(MathMLNames::msTag)
1137        || currentElement->hasTagName(MathMLNames::mtextTag)) {
1138        return token.name() != MathMLNames::mglyphTag
1139            && token.name() != MathMLNames::malignmarkTag;
1140    }
1141    if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1142        return token.name() == SVGNames::svgTag;
1143    if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1144        || currentElement->hasTagName(SVGNames::descTag)
1145        || currentElement->hasTagName(SVGNames::titleTag))
1146        return true;
1147    return isInHTMLNamespace(currentElement);
1148}
1149
1150}
1151
1152void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1153{
1154    ASSERT(token.type() == HTMLToken::StartTag);
1155    switch (insertionMode()) {
1156    case InitialMode:
1157        ASSERT(insertionMode() == InitialMode);
1158        defaultForInitial();
1159        // Fall through.
1160    case BeforeHTMLMode:
1161        ASSERT(insertionMode() == BeforeHTMLMode);
1162        if (token.name() == htmlTag) {
1163            m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1164            setInsertionMode(BeforeHeadMode);
1165            return;
1166        }
1167        defaultForBeforeHTML();
1168        // Fall through.
1169    case BeforeHeadMode:
1170        ASSERT(insertionMode() == BeforeHeadMode);
1171        if (token.name() == htmlTag) {
1172            m_tree.insertHTMLHtmlStartTagInBody(token);
1173            return;
1174        }
1175        if (token.name() == headTag) {
1176            m_tree.insertHTMLHeadElement(token);
1177            setInsertionMode(InHeadMode);
1178            return;
1179        }
1180        defaultForBeforeHead();
1181        // Fall through.
1182    case InHeadMode:
1183        ASSERT(insertionMode() == InHeadMode);
1184        if (processStartTagForInHead(token))
1185            return;
1186        defaultForInHead();
1187        // Fall through.
1188    case AfterHeadMode:
1189        ASSERT(insertionMode() == AfterHeadMode);
1190        if (token.name() == htmlTag) {
1191            m_tree.insertHTMLHtmlStartTagInBody(token);
1192            return;
1193        }
1194        if (token.name() == bodyTag) {
1195            m_framesetOk = false;
1196            m_tree.insertHTMLBodyElement(token);
1197            setInsertionMode(InBodyMode);
1198            return;
1199        }
1200        if (token.name() == framesetTag) {
1201            m_tree.insertHTMLElement(token);
1202            setInsertionMode(InFramesetMode);
1203            return;
1204        }
1205        if (token.name() == baseTag
1206            || token.name() == basefontTag
1207            || token.name() == bgsoundTag
1208            || token.name() == linkTag
1209            || token.name() == metaTag
1210            || token.name() == noframesTag
1211            || token.name() == scriptTag
1212            || token.name() == styleTag
1213            || token.name() == titleTag) {
1214            parseError(token);
1215            ASSERT(m_tree.head());
1216            m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1217            processStartTagForInHead(token);
1218            m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1219            return;
1220        }
1221        if (token.name() == headTag) {
1222            parseError(token);
1223            return;
1224        }
1225        defaultForAfterHead();
1226        // Fall through
1227    case InBodyMode:
1228        ASSERT(insertionMode() == InBodyMode);
1229        processStartTagForInBody(token);
1230        break;
1231    case InTableMode:
1232        ASSERT(insertionMode() == InTableMode);
1233        processStartTagForInTable(token);
1234        break;
1235    case InCaptionMode:
1236        ASSERT(insertionMode() == InCaptionMode);
1237        if (isCaptionColOrColgroupTag(token.name())
1238            || isTableBodyContextTag(token.name())
1239            || isTableCellContextTag(token.name())
1240            || token.name() == trTag) {
1241            parseError(token);
1242            if (!processCaptionEndTagForInCaption()) {
1243                ASSERT(isParsingFragment());
1244                return;
1245            }
1246            reprocessStartTag(token);
1247            return;
1248        }
1249        processStartTagForInBody(token);
1250        break;
1251    case InColumnGroupMode:
1252        ASSERT(insertionMode() == InColumnGroupMode);
1253        if (token.name() == htmlTag) {
1254            m_tree.insertHTMLHtmlStartTagInBody(token);
1255            return;
1256        }
1257        if (token.name() == colTag) {
1258            m_tree.insertSelfClosingHTMLElement(token);
1259            return;
1260        }
1261        if (!processColgroupEndTagForInColumnGroup()) {
1262            ASSERT(isParsingFragment());
1263            return;
1264        }
1265        reprocessStartTag(token);
1266        break;
1267    case InTableBodyMode:
1268        ASSERT(insertionMode() == InTableBodyMode);
1269        if (token.name() == trTag) {
1270            m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1271            m_tree.insertHTMLElement(token);
1272            setInsertionMode(InRowMode);
1273            return;
1274        }
1275        if (isTableCellContextTag(token.name())) {
1276            parseError(token);
1277            processFakeStartTag(trTag);
1278            ASSERT(insertionMode() == InRowMode);
1279            reprocessStartTag(token);
1280            return;
1281        }
1282        if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1283            // FIXME: This is slow.
1284            if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1285                ASSERT(isParsingFragment());
1286                parseError(token);
1287                return;
1288            }
1289            m_tree.openElements()->popUntilTableBodyScopeMarker();
1290            ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1291            processFakeEndTag(m_tree.currentElement()->tagQName());
1292            reprocessStartTag(token);
1293            return;
1294        }
1295        processStartTagForInTable(token);
1296        break;
1297    case InRowMode:
1298        ASSERT(insertionMode() == InRowMode);
1299        if (isTableCellContextTag(token.name())) {
1300            m_tree.openElements()->popUntilTableRowScopeMarker();
1301            m_tree.insertHTMLElement(token);
1302            setInsertionMode(InCellMode);
1303            m_tree.activeFormattingElements()->appendMarker();
1304            return;
1305        }
1306        if (token.name() == trTag
1307            || isCaptionColOrColgroupTag(token.name())
1308            || isTableBodyContextTag(token.name())) {
1309            if (!processTrEndTagForInRow()) {
1310                ASSERT(isParsingFragment());
1311                return;
1312            }
1313            ASSERT(insertionMode() == InTableBodyMode);
1314            reprocessStartTag(token);
1315            return;
1316        }
1317        processStartTagForInTable(token);
1318        break;
1319    case InCellMode:
1320        ASSERT(insertionMode() == InCellMode);
1321        if (isCaptionColOrColgroupTag(token.name())
1322            || isTableCellContextTag(token.name())
1323            || token.name() == trTag
1324            || isTableBodyContextTag(token.name())) {
1325            // FIXME: This could be more efficient.
1326            if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1327                ASSERT(isParsingFragment());
1328                parseError(token);
1329                return;
1330            }
1331            closeTheCell();
1332            reprocessStartTag(token);
1333            return;
1334        }
1335        processStartTagForInBody(token);
1336        break;
1337    case AfterBodyMode:
1338    case AfterAfterBodyMode:
1339        ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1340        if (token.name() == htmlTag) {
1341            m_tree.insertHTMLHtmlStartTagInBody(token);
1342            return;
1343        }
1344        setInsertionMode(InBodyMode);
1345        reprocessStartTag(token);
1346        break;
1347    case InHeadNoscriptMode:
1348        ASSERT(insertionMode() == InHeadNoscriptMode);
1349        if (token.name() == htmlTag) {
1350            m_tree.insertHTMLHtmlStartTagInBody(token);
1351            return;
1352        }
1353        if (token.name() == basefontTag
1354            || token.name() == bgsoundTag
1355            || token.name() == linkTag
1356            || token.name() == metaTag
1357            || token.name() == noframesTag
1358            || token.name() == styleTag) {
1359            bool didProcess = processStartTagForInHead(token);
1360            ASSERT_UNUSED(didProcess, didProcess);
1361            return;
1362        }
1363        if (token.name() == htmlTag || token.name() == noscriptTag) {
1364            parseError(token);
1365            return;
1366        }
1367        defaultForInHeadNoscript();
1368        processToken(token);
1369        break;
1370    case InFramesetMode:
1371        ASSERT(insertionMode() == InFramesetMode);
1372        if (token.name() == htmlTag) {
1373            m_tree.insertHTMLHtmlStartTagInBody(token);
1374            return;
1375        }
1376        if (token.name() == framesetTag) {
1377            m_tree.insertHTMLElement(token);
1378            return;
1379        }
1380        if (token.name() == frameTag) {
1381            m_tree.insertSelfClosingHTMLElement(token);
1382            return;
1383        }
1384        if (token.name() == noframesTag) {
1385            processStartTagForInHead(token);
1386            return;
1387        }
1388        parseError(token);
1389        break;
1390    case AfterFramesetMode:
1391    case AfterAfterFramesetMode:
1392        ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1393        if (token.name() == htmlTag) {
1394            m_tree.insertHTMLHtmlStartTagInBody(token);
1395            return;
1396        }
1397        if (token.name() == noframesTag) {
1398            processStartTagForInHead(token);
1399            return;
1400        }
1401        parseError(token);
1402        break;
1403    case InSelectInTableMode:
1404        ASSERT(insertionMode() == InSelectInTableMode);
1405        if (token.name() == captionTag
1406            || token.name() == tableTag
1407            || isTableBodyContextTag(token.name())
1408            || token.name() == trTag
1409            || isTableCellContextTag(token.name())) {
1410            parseError(token);
1411            AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1412            processEndTag(endSelect);
1413            reprocessStartTag(token);
1414            return;
1415        }
1416        // Fall through
1417    case InSelectMode:
1418        ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1419        if (token.name() == htmlTag) {
1420            m_tree.insertHTMLHtmlStartTagInBody(token);
1421            return;
1422        }
1423        if (token.name() == optionTag) {
1424            if (m_tree.currentNode()->hasTagName(optionTag)) {
1425                AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1426                processEndTag(endOption);
1427            }
1428            m_tree.insertHTMLElement(token);
1429            return;
1430        }
1431        if (token.name() == optgroupTag) {
1432            if (m_tree.currentNode()->hasTagName(optionTag)) {
1433                AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1434                processEndTag(endOption);
1435            }
1436            if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1437                AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1438                processEndTag(endOptgroup);
1439            }
1440            m_tree.insertHTMLElement(token);
1441            return;
1442        }
1443        if (token.name() == selectTag) {
1444            parseError(token);
1445            AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1446            processEndTag(endSelect);
1447            return;
1448        }
1449        if (token.name() == inputTag
1450            || token.name() == keygenTag
1451            || token.name() == textareaTag) {
1452            parseError(token);
1453            if (!m_tree.openElements()->inSelectScope(selectTag)) {
1454                ASSERT(isParsingFragment());
1455                return;
1456            }
1457            AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1458            processEndTag(endSelect);
1459            reprocessStartTag(token);
1460            return;
1461        }
1462        if (token.name() == scriptTag) {
1463            bool didProcess = processStartTagForInHead(token);
1464            ASSERT_UNUSED(didProcess, didProcess);
1465            return;
1466        }
1467        break;
1468    case InTableTextMode:
1469        defaultForInTableText();
1470        processStartTag(token);
1471        break;
1472    case InForeignContentMode: {
1473        if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentNode())) {
1474            processForeignContentUsingInBodyModeAndResetMode(token);
1475            return;
1476        }
1477        if (token.name() == bTag
1478            || token.name() == bigTag
1479            || token.name() == blockquoteTag
1480            || token.name() == bodyTag
1481            || token.name() == brTag
1482            || token.name() == centerTag
1483            || token.name() == codeTag
1484            || token.name() == ddTag
1485            || token.name() == divTag
1486            || token.name() == dlTag
1487            || token.name() == dtTag
1488            || token.name() == emTag
1489            || token.name() == embedTag
1490            || isNumberedHeaderTag(token.name())
1491            || token.name() == headTag
1492            || token.name() == hrTag
1493            || token.name() == iTag
1494            || token.name() == imgTag
1495            || token.name() == liTag
1496            || token.name() == listingTag
1497            || token.name() == menuTag
1498            || token.name() == metaTag
1499            || token.name() == nobrTag
1500            || token.name() == olTag
1501            || token.name() == pTag
1502            || token.name() == preTag
1503            || token.name() == rubyTag
1504            || token.name() == sTag
1505            || token.name() == smallTag
1506            || token.name() == spanTag
1507            || token.name() == strongTag
1508            || token.name() == strikeTag
1509            || token.name() == subTag
1510            || token.name() == supTag
1511            || token.name() == tableTag
1512            || token.name() == ttTag
1513            || token.name() == uTag
1514            || token.name() == ulTag
1515            || token.name() == varTag
1516            || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1517            parseError(token);
1518            m_tree.openElements()->popUntilForeignContentScopeMarker();
1519            resetInsertionModeAppropriately();
1520            reprocessStartTag(token);
1521            return;
1522        }
1523        const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1524        if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1525            adjustMathMLAttributes(token);
1526        if (currentNamespace == SVGNames::svgNamespaceURI) {
1527            adjustSVGTagNameCase(token);
1528            adjustSVGAttributes(token);
1529        }
1530        adjustForeignAttributes(token);
1531        m_tree.insertForeignElement(token, currentNamespace);
1532        break;
1533    }
1534    case TextMode:
1535        ASSERT_NOT_REACHED();
1536        break;
1537    }
1538}
1539
1540bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1541{
1542    ASSERT(token.type() == HTMLToken::EndTag);
1543    ASSERT(token.name() == bodyTag);
1544    if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1545        parseError(token);
1546        return false;
1547    }
1548    notImplemented(); // Emit a more specific parse error based on stack contents.
1549    setInsertionMode(AfterBodyMode);
1550    return true;
1551}
1552
1553void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1554{
1555    ASSERT(token.type() == HTMLToken::EndTag);
1556    HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1557    while (1) {
1558        ContainerNode* node = record->node();
1559        if (node->hasLocalName(token.name())) {
1560            m_tree.generateImpliedEndTags();
1561            // FIXME: The ElementRecord pointed to by record might be deleted by
1562            // the preceding call. Perhaps we should hold a RefPtr so that it
1563            // stays alive for the duration of record's scope.
1564            record = 0;
1565            if (!m_tree.currentNode()->hasLocalName(token.name())) {
1566                parseError(token);
1567                // FIXME: This is either a bug in the spec, or a bug in our
1568                // implementation.  Filed a bug with HTML5:
1569                // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1570                // We might have already popped the node for the token in
1571                // generateImpliedEndTags, just abort.
1572                if (!m_tree.openElements()->contains(toElement(node)))
1573                    return;
1574            }
1575            m_tree.openElements()->popUntilPopped(toElement(node));
1576            return;
1577        }
1578        if (isSpecialNode(node)) {
1579            parseError(token);
1580            return;
1581        }
1582        record = record->next();
1583    }
1584}
1585
1586// FIXME: This probably belongs on HTMLElementStack.
1587HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1588{
1589    HTMLElementStack::ElementRecord* furthestBlock = 0;
1590    HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1591    for (; record; record = record->next()) {
1592        if (record->element() == formattingElement)
1593            return furthestBlock;
1594        if (isSpecialNode(record->element()))
1595            furthestBlock = record;
1596    }
1597    ASSERT_NOT_REACHED();
1598    return 0;
1599}
1600
1601// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1602void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1603{
1604    // The adoption agency algorithm is N^2.  We limit the number of iterations
1605    // to stop from hanging the whole browser.  This limit is copied from the
1606    // legacy tree builder and might need to be tweaked in the future.
1607    static const int adoptionAgencyIterationLimit = 10;
1608
1609    for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1610        // 1.
1611        Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1612        if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1613            parseError(token);
1614            notImplemented(); // Check the stack of open elements for a more specific parse error.
1615            return;
1616        }
1617        HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1618        if (!formattingElementRecord) {
1619            parseError(token);
1620            m_tree.activeFormattingElements()->remove(formattingElement);
1621            return;
1622        }
1623        if (formattingElement != m_tree.currentElement())
1624            parseError(token);
1625        // 2.
1626        HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1627        // 3.
1628        if (!furthestBlock) {
1629            m_tree.openElements()->popUntilPopped(formattingElement);
1630            m_tree.activeFormattingElements()->remove(formattingElement);
1631            return;
1632        }
1633        // 4.
1634        ASSERT(furthestBlock->isAbove(formattingElementRecord));
1635        ContainerNode* commonAncestor = formattingElementRecord->next()->node();
1636        // 5.
1637        HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1638        // 6.
1639        HTMLElementStack::ElementRecord* node = furthestBlock;
1640        HTMLElementStack::ElementRecord* nextNode = node->next();
1641        HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1642        for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1643            // 6.1
1644            node = nextNode;
1645            ASSERT(node);
1646            nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1647            // 6.2
1648            if (!m_tree.activeFormattingElements()->contains(node->element())) {
1649                m_tree.openElements()->remove(node->element());
1650                node = 0;
1651                continue;
1652            }
1653            // 6.3
1654            if (node == formattingElementRecord)
1655                break;
1656            // 6.5
1657            RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1658            HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1659            nodeEntry->replaceElement(newElement.get());
1660            node->replaceElement(newElement.release());
1661            // 6.4 -- Intentionally out of order to handle the case where node
1662            // was replaced in 6.5.
1663            // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1664            if (lastNode == furthestBlock)
1665                bookmark.moveToAfter(nodeEntry);
1666            // 6.6
1667            if (Element* parent = lastNode->element()->parentElement())
1668                parent->parserRemoveChild(lastNode->element());
1669            node->element()->parserAddChild(lastNode->element());
1670            if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1671                lastNode->element()->lazyAttach();
1672            // 6.7
1673            lastNode = node;
1674        }
1675        // 7
1676        const AtomicString& commonAncestorTag = commonAncestor->localName();
1677        if (Element* parent = lastNode->element()->parentElement())
1678            parent->parserRemoveChild(lastNode->element());
1679        // FIXME: If this moves to HTMLConstructionSite, this check should use
1680        // causesFosterParenting(tagName) instead.
1681        if (commonAncestorTag == tableTag
1682            || commonAncestorTag == trTag
1683            || isTableBodyContextTag(commonAncestorTag))
1684            m_tree.fosterParent(lastNode->element());
1685        else {
1686            commonAncestor->parserAddChild(lastNode->element());
1687            ASSERT(lastNode->node()->isElementNode());
1688            ASSERT(lastNode->element()->parentNode());
1689            if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1690                lastNode->element()->lazyAttach();
1691        }
1692        // 8
1693        RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1694        // 9
1695        newElement->takeAllChildrenFrom(furthestBlock->element());
1696        // 10
1697        Element* furthestBlockElement = furthestBlock->element();
1698        // FIXME: All this creation / parserAddChild / attach business should
1699        //        be in HTMLConstructionSite.  My guess is that steps 8--12
1700        //        should all be in some HTMLConstructionSite function.
1701        furthestBlockElement->parserAddChild(newElement);
1702        if (furthestBlockElement->attached() && !newElement->attached()) {
1703            // Notice that newElement might already be attached if, for example, one of the reparented
1704            // children is a style element, which attaches itself automatically.
1705            newElement->attach();
1706        }
1707        // 11
1708        m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1709        // 12
1710        m_tree.openElements()->remove(formattingElement);
1711        m_tree.openElements()->insertAbove(newElement, furthestBlock);
1712    }
1713}
1714
1715void HTMLTreeBuilder::resetInsertionModeAppropriately()
1716{
1717    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1718    bool last = false;
1719    HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1720    while (1) {
1721        ContainerNode* node = nodeRecord->node();
1722        if (node == m_tree.openElements()->rootNode()) {
1723            ASSERT(isParsingFragment());
1724            last = true;
1725            node = m_fragmentContext.contextElement();
1726        }
1727        if (node->hasTagName(selectTag)) {
1728            ASSERT(isParsingFragment());
1729            return setInsertionMode(InSelectMode);
1730        }
1731        if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1732            return setInsertionMode(InCellMode);
1733        if (node->hasTagName(trTag))
1734            return setInsertionMode(InRowMode);
1735        if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1736            return setInsertionMode(InTableBodyMode);
1737        if (node->hasTagName(captionTag))
1738            return setInsertionMode(InCaptionMode);
1739        if (node->hasTagName(colgroupTag)) {
1740            ASSERT(isParsingFragment());
1741            return setInsertionMode(InColumnGroupMode);
1742        }
1743        if (node->hasTagName(tableTag))
1744            return setInsertionMode(InTableMode);
1745        if (node->hasTagName(headTag)) {
1746            ASSERT(isParsingFragment());
1747            return setInsertionMode(InBodyMode);
1748        }
1749        if (node->hasTagName(bodyTag))
1750            return setInsertionMode(InBodyMode);
1751        if (node->hasTagName(framesetTag)) {
1752            ASSERT(isParsingFragment());
1753            return setInsertionMode(InFramesetMode);
1754        }
1755        if (node->hasTagName(htmlTag)) {
1756            ASSERT(isParsingFragment());
1757            return setInsertionMode(BeforeHeadMode);
1758        }
1759        if (node->namespaceURI() == SVGNames::svgNamespaceURI
1760            || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1761            return setInsertionMode(InForeignContentMode);
1762        if (last) {
1763            ASSERT(isParsingFragment());
1764            return setInsertionMode(InBodyMode);
1765        }
1766        nodeRecord = nodeRecord->next();
1767    }
1768}
1769
1770void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1771{
1772    ASSERT(token.type() == HTMLToken::EndTag);
1773    if (isTableBodyContextTag(token.name())) {
1774        if (!m_tree.openElements()->inTableScope(token.name())) {
1775            parseError(token);
1776            return;
1777        }
1778        m_tree.openElements()->popUntilTableBodyScopeMarker();
1779        m_tree.openElements()->pop();
1780        setInsertionMode(InTableMode);
1781        return;
1782    }
1783    if (token.name() == tableTag) {
1784        // FIXME: This is slow.
1785        if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1786            ASSERT(isParsingFragment());
1787            parseError(token);
1788            return;
1789        }
1790        m_tree.openElements()->popUntilTableBodyScopeMarker();
1791        ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1792        processFakeEndTag(m_tree.currentElement()->tagQName());
1793        reprocessEndTag(token);
1794        return;
1795    }
1796    if (token.name() == bodyTag
1797        || isCaptionColOrColgroupTag(token.name())
1798        || token.name() == htmlTag
1799        || isTableCellContextTag(token.name())
1800        || token.name() == trTag) {
1801        parseError(token);
1802        return;
1803    }
1804    processEndTagForInTable(token);
1805}
1806
1807void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1808{
1809    ASSERT(token.type() == HTMLToken::EndTag);
1810    if (token.name() == trTag) {
1811        processTrEndTagForInRow();
1812        return;
1813    }
1814    if (token.name() == tableTag) {
1815        if (!processTrEndTagForInRow()) {
1816            ASSERT(isParsingFragment());
1817            return;
1818        }
1819        ASSERT(insertionMode() == InTableBodyMode);
1820        reprocessEndTag(token);
1821        return;
1822    }
1823    if (isTableBodyContextTag(token.name())) {
1824        if (!m_tree.openElements()->inTableScope(token.name())) {
1825            parseError(token);
1826            return;
1827        }
1828        processFakeEndTag(trTag);
1829        ASSERT(insertionMode() == InTableBodyMode);
1830        reprocessEndTag(token);
1831        return;
1832    }
1833    if (token.name() == bodyTag
1834        || isCaptionColOrColgroupTag(token.name())
1835        || token.name() == htmlTag
1836        || isTableCellContextTag(token.name())) {
1837        parseError(token);
1838        return;
1839    }
1840    processEndTagForInTable(token);
1841}
1842
1843void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1844{
1845    ASSERT(token.type() == HTMLToken::EndTag);
1846    if (isTableCellContextTag(token.name())) {
1847        if (!m_tree.openElements()->inTableScope(token.name())) {
1848            parseError(token);
1849            return;
1850        }
1851        m_tree.generateImpliedEndTags();
1852        if (!m_tree.currentNode()->hasLocalName(token.name()))
1853            parseError(token);
1854        m_tree.openElements()->popUntilPopped(token.name());
1855        m_tree.activeFormattingElements()->clearToLastMarker();
1856        setInsertionMode(InRowMode);
1857        return;
1858    }
1859    if (token.name() == bodyTag
1860        || isCaptionColOrColgroupTag(token.name())
1861        || token.name() == htmlTag) {
1862        parseError(token);
1863        return;
1864    }
1865    if (token.name() == tableTag
1866        || token.name() == trTag
1867        || isTableBodyContextTag(token.name())) {
1868        if (!m_tree.openElements()->inTableScope(token.name())) {
1869            ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1870            parseError(token);
1871            return;
1872        }
1873        closeTheCell();
1874        reprocessEndTag(token);
1875        return;
1876    }
1877    processEndTagForInBody(token);
1878}
1879
1880void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1881{
1882    ASSERT(token.type() == HTMLToken::EndTag);
1883    if (token.name() == bodyTag) {
1884        processBodyEndTagForInBody(token);
1885        return;
1886    }
1887    if (token.name() == htmlTag) {
1888        AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1889        if (processBodyEndTagForInBody(endBody))
1890            reprocessEndTag(token);
1891        return;
1892    }
1893    if (token.name() == addressTag
1894        || token.name() == articleTag
1895        || token.name() == asideTag
1896        || token.name() == blockquoteTag
1897        || token.name() == buttonTag
1898        || token.name() == centerTag
1899        || token.name() == detailsTag
1900        || token.name() == dirTag
1901        || token.name() == divTag
1902        || token.name() == dlTag
1903        || token.name() == fieldsetTag
1904        || token.name() == figcaptionTag
1905        || token.name() == figureTag
1906        || token.name() == footerTag
1907        || token.name() == headerTag
1908        || token.name() == hgroupTag
1909        || token.name() == listingTag
1910        || token.name() == menuTag
1911        || token.name() == navTag
1912        || token.name() == olTag
1913        || token.name() == preTag
1914        || token.name() == sectionTag
1915        || token.name() == summaryTag
1916        || token.name() == ulTag) {
1917        if (!m_tree.openElements()->inScope(token.name())) {
1918            parseError(token);
1919            return;
1920        }
1921        m_tree.generateImpliedEndTags();
1922        if (!m_tree.currentNode()->hasLocalName(token.name()))
1923            parseError(token);
1924        m_tree.openElements()->popUntilPopped(token.name());
1925        return;
1926    }
1927    if (token.name() == formTag) {
1928        RefPtr<Element> node = m_tree.takeForm();
1929        if (!node || !m_tree.openElements()->inScope(node.get())) {
1930            parseError(token);
1931            return;
1932        }
1933        m_tree.generateImpliedEndTags();
1934        if (m_tree.currentElement() != node.get())
1935            parseError(token);
1936        m_tree.openElements()->remove(node.get());
1937    }
1938    if (token.name() == pTag) {
1939        if (!m_tree.openElements()->inButtonScope(token.name())) {
1940            parseError(token);
1941            processFakeStartTag(pTag);
1942            ASSERT(m_tree.openElements()->inScope(token.name()));
1943            reprocessEndTag(token);
1944            return;
1945        }
1946        m_tree.generateImpliedEndTagsWithExclusion(token.name());
1947        if (!m_tree.currentNode()->hasLocalName(token.name()))
1948            parseError(token);
1949        m_tree.openElements()->popUntilPopped(token.name());
1950        return;
1951    }
1952    if (token.name() == liTag) {
1953        if (!m_tree.openElements()->inListItemScope(token.name())) {
1954            parseError(token);
1955            return;
1956        }
1957        m_tree.generateImpliedEndTagsWithExclusion(token.name());
1958        if (!m_tree.currentNode()->hasLocalName(token.name()))
1959            parseError(token);
1960        m_tree.openElements()->popUntilPopped(token.name());
1961        return;
1962    }
1963    if (token.name() == ddTag
1964        || token.name() == dtTag) {
1965        if (!m_tree.openElements()->inScope(token.name())) {
1966            parseError(token);
1967            return;
1968        }
1969        m_tree.generateImpliedEndTagsWithExclusion(token.name());
1970        if (!m_tree.currentNode()->hasLocalName(token.name()))
1971            parseError(token);
1972        m_tree.openElements()->popUntilPopped(token.name());
1973        return;
1974    }
1975    if (isNumberedHeaderTag(token.name())) {
1976        if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1977            parseError(token);
1978            return;
1979        }
1980        m_tree.generateImpliedEndTags();
1981        if (!m_tree.currentNode()->hasLocalName(token.name()))
1982            parseError(token);
1983        m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1984        return;
1985    }
1986    if (isFormattingTag(token.name())) {
1987        callTheAdoptionAgency(token);
1988        return;
1989    }
1990    if (token.name() == appletTag
1991        || token.name() == marqueeTag
1992        || token.name() == objectTag) {
1993        if (!m_tree.openElements()->inScope(token.name())) {
1994            parseError(token);
1995            return;
1996        }
1997        m_tree.generateImpliedEndTags();
1998        if (!m_tree.currentNode()->hasLocalName(token.name()))
1999            parseError(token);
2000        m_tree.openElements()->popUntilPopped(token.name());
2001        m_tree.activeFormattingElements()->clearToLastMarker();
2002        return;
2003    }
2004    if (token.name() == brTag) {
2005        parseError(token);
2006        processFakeStartTag(brTag);
2007        return;
2008    }
2009    processAnyOtherEndTagForInBody(token);
2010}
2011
2012bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
2013{
2014    if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
2015        ASSERT(isParsingFragment());
2016        // FIXME: parse error
2017        return false;
2018    }
2019    m_tree.generateImpliedEndTags();
2020    // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
2021    m_tree.openElements()->popUntilPopped(captionTag.localName());
2022    m_tree.activeFormattingElements()->clearToLastMarker();
2023    setInsertionMode(InTableMode);
2024    return true;
2025}
2026
2027bool HTMLTreeBuilder::processTrEndTagForInRow()
2028{
2029    if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2030        ASSERT(isParsingFragment());
2031        // FIXME: parse error
2032        return false;
2033    }
2034    m_tree.openElements()->popUntilTableRowScopeMarker();
2035    ASSERT(m_tree.currentElement()->hasTagName(trTag));
2036    m_tree.openElements()->pop();
2037    setInsertionMode(InTableBodyMode);
2038    return true;
2039}
2040
2041bool HTMLTreeBuilder::processTableEndTagForInTable()
2042{
2043    if (!m_tree.openElements()->inTableScope(tableTag)) {
2044        ASSERT(isParsingFragment());
2045        // FIXME: parse error.
2046        return false;
2047    }
2048    m_tree.openElements()->popUntilPopped(tableTag.localName());
2049    resetInsertionModeAppropriately();
2050    return true;
2051}
2052
2053void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2054{
2055    ASSERT(token.type() == HTMLToken::EndTag);
2056    if (token.name() == tableTag) {
2057        processTableEndTagForInTable();
2058        return;
2059    }
2060    if (token.name() == bodyTag
2061        || isCaptionColOrColgroupTag(token.name())
2062        || token.name() == htmlTag
2063        || isTableBodyContextTag(token.name())
2064        || isTableCellContextTag(token.name())
2065        || token.name() == trTag) {
2066        parseError(token);
2067        return;
2068    }
2069    // Is this redirection necessary here?
2070    HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2071    processEndTagForInBody(token);
2072}
2073
2074void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2075{
2076    ASSERT(token.type() == HTMLToken::EndTag);
2077    switch (insertionMode()) {
2078    case InitialMode:
2079        ASSERT(insertionMode() == InitialMode);
2080        defaultForInitial();
2081        // Fall through.
2082    case BeforeHTMLMode:
2083        ASSERT(insertionMode() == BeforeHTMLMode);
2084        if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2085            parseError(token);
2086            return;
2087        }
2088        defaultForBeforeHTML();
2089        // Fall through.
2090    case BeforeHeadMode:
2091        ASSERT(insertionMode() == BeforeHeadMode);
2092        if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2093            parseError(token);
2094            return;
2095        }
2096        defaultForBeforeHead();
2097        // Fall through.
2098    case InHeadMode:
2099        ASSERT(insertionMode() == InHeadMode);
2100        if (token.name() == headTag) {
2101            m_tree.openElements()->popHTMLHeadElement();
2102            setInsertionMode(AfterHeadMode);
2103            return;
2104        }
2105        if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2106            parseError(token);
2107            return;
2108        }
2109        defaultForInHead();
2110        // Fall through.
2111    case AfterHeadMode:
2112        ASSERT(insertionMode() == AfterHeadMode);
2113        if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2114            parseError(token);
2115            return;
2116        }
2117        defaultForAfterHead();
2118        // Fall through
2119    case InBodyMode:
2120        ASSERT(insertionMode() == InBodyMode);
2121        processEndTagForInBody(token);
2122        break;
2123    case InTableMode:
2124        ASSERT(insertionMode() == InTableMode);
2125        processEndTagForInTable(token);
2126        break;
2127    case InCaptionMode:
2128        ASSERT(insertionMode() == InCaptionMode);
2129        if (token.name() == captionTag) {
2130            processCaptionEndTagForInCaption();
2131            return;
2132        }
2133        if (token.name() == tableTag) {
2134            parseError(token);
2135            if (!processCaptionEndTagForInCaption()) {
2136                ASSERT(isParsingFragment());
2137                return;
2138            }
2139            reprocessEndTag(token);
2140            return;
2141        }
2142        if (token.name() == bodyTag
2143            || token.name() == colTag
2144            || token.name() == colgroupTag
2145            || token.name() == htmlTag
2146            || isTableBodyContextTag(token.name())
2147            || isTableCellContextTag(token.name())
2148            || token.name() == trTag) {
2149            parseError(token);
2150            return;
2151        }
2152        processEndTagForInBody(token);
2153        break;
2154    case InColumnGroupMode:
2155        ASSERT(insertionMode() == InColumnGroupMode);
2156        if (token.name() == colgroupTag) {
2157            processColgroupEndTagForInColumnGroup();
2158            return;
2159        }
2160        if (token.name() == colTag) {
2161            parseError(token);
2162            return;
2163        }
2164        if (!processColgroupEndTagForInColumnGroup()) {
2165            ASSERT(isParsingFragment());
2166            return;
2167        }
2168        reprocessEndTag(token);
2169        break;
2170    case InRowMode:
2171        ASSERT(insertionMode() == InRowMode);
2172        processEndTagForInRow(token);
2173        break;
2174    case InCellMode:
2175        ASSERT(insertionMode() == InCellMode);
2176        processEndTagForInCell(token);
2177        break;
2178    case InTableBodyMode:
2179        ASSERT(insertionMode() == InTableBodyMode);
2180        processEndTagForInTableBody(token);
2181        break;
2182    case AfterBodyMode:
2183        ASSERT(insertionMode() == AfterBodyMode);
2184        if (token.name() == htmlTag) {
2185            if (isParsingFragment()) {
2186                parseError(token);
2187                return;
2188            }
2189            setInsertionMode(AfterAfterBodyMode);
2190            return;
2191        }
2192        prepareToReprocessToken();
2193        // Fall through.
2194    case AfterAfterBodyMode:
2195        ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2196        parseError(token);
2197        setInsertionMode(InBodyMode);
2198        reprocessEndTag(token);
2199        break;
2200    case InHeadNoscriptMode:
2201        ASSERT(insertionMode() == InHeadNoscriptMode);
2202        if (token.name() == noscriptTag) {
2203            ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2204            m_tree.openElements()->pop();
2205            ASSERT(m_tree.currentElement()->hasTagName(headTag));
2206            setInsertionMode(InHeadMode);
2207            return;
2208        }
2209        if (token.name() != brTag) {
2210            parseError(token);
2211            return;
2212        }
2213        defaultForInHeadNoscript();
2214        processToken(token);
2215        break;
2216    case TextMode:
2217        if (token.name() == scriptTag) {
2218            // Pause ourselves so that parsing stops until the script can be processed by the caller.
2219            m_isPaused = true;
2220            ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2221            m_scriptToProcess = m_tree.currentElement();
2222            m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
2223            m_tree.openElements()->pop();
2224            if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2225                m_scriptToProcess->removeAllChildren();
2226            setInsertionMode(m_originalInsertionMode);
2227
2228            // This token will not have been created by the tokenizer if a
2229            // self-closing script tag was encountered and pre-HTML5 parser
2230            // quirks are enabled. We must set the tokenizer's state to
2231            // DataState explicitly if the tokenizer didn't have a chance to.
2232            ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
2233            m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2234            return;
2235        }
2236        m_tree.openElements()->pop();
2237        setInsertionMode(m_originalInsertionMode);
2238        break;
2239    case InFramesetMode:
2240        ASSERT(insertionMode() == InFramesetMode);
2241        if (token.name() == framesetTag) {
2242            if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2243                parseError(token);
2244                return;
2245            }
2246            m_tree.openElements()->pop();
2247            if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2248                setInsertionMode(AfterFramesetMode);
2249            return;
2250        }
2251        break;
2252    case AfterFramesetMode:
2253        ASSERT(insertionMode() == AfterFramesetMode);
2254        if (token.name() == htmlTag) {
2255            setInsertionMode(AfterAfterFramesetMode);
2256            return;
2257        }
2258        // Fall through.
2259    case AfterAfterFramesetMode:
2260        ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2261        parseError(token);
2262        break;
2263    case InSelectInTableMode:
2264        ASSERT(insertionMode() == InSelectInTableMode);
2265        if (token.name() == captionTag
2266            || token.name() == tableTag
2267            || isTableBodyContextTag(token.name())
2268            || token.name() == trTag
2269            || isTableCellContextTag(token.name())) {
2270            parseError(token);
2271            if (m_tree.openElements()->inTableScope(token.name())) {
2272                AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2273                processEndTag(endSelect);
2274                reprocessEndTag(token);
2275            }
2276            return;
2277        }
2278        // Fall through.
2279    case InSelectMode:
2280        ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2281        if (token.name() == optgroupTag) {
2282            if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2283                processFakeEndTag(optionTag);
2284            if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2285                m_tree.openElements()->pop();
2286                return;
2287            }
2288            parseError(token);
2289            return;
2290        }
2291        if (token.name() == optionTag) {
2292            if (m_tree.currentNode()->hasTagName(optionTag)) {
2293                m_tree.openElements()->pop();
2294                return;
2295            }
2296            parseError(token);
2297            return;
2298        }
2299        if (token.name() == selectTag) {
2300            if (!m_tree.openElements()->inSelectScope(token.name())) {
2301                ASSERT(isParsingFragment());
2302                parseError(token);
2303                return;
2304            }
2305            m_tree.openElements()->popUntilPopped(selectTag.localName());
2306            resetInsertionModeAppropriately();
2307            return;
2308        }
2309        break;
2310    case InTableTextMode:
2311        defaultForInTableText();
2312        processEndTag(token);
2313        break;
2314    case InForeignContentMode:
2315        if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2316            notImplemented();
2317            return;
2318        }
2319        if (!isInHTMLNamespace(m_tree.currentNode())) {
2320            // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2321            HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2322            if (!nodeRecord->node()->hasLocalName(token.name()))
2323                parseError(token);
2324            while (1) {
2325                if (nodeRecord->node()->hasLocalName(token.name())) {
2326                    m_tree.openElements()->popUntilPopped(nodeRecord->element());
2327                    resetForeignInsertionMode();
2328                    return;
2329                }
2330                nodeRecord = nodeRecord->next();
2331
2332                if (isInHTMLNamespace(nodeRecord->node()))
2333                    break;
2334            }
2335        }
2336        // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2337        processForeignContentUsingInBodyModeAndResetMode(token);
2338        break;
2339    }
2340}
2341
2342void HTMLTreeBuilder::prepareToReprocessToken()
2343{
2344    if (m_hasPendingForeignInsertionModeSteps) {
2345        resetForeignInsertionMode();
2346        m_hasPendingForeignInsertionModeSteps = false;
2347    }
2348}
2349
2350void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
2351{
2352    prepareToReprocessToken();
2353    processStartTag(token);
2354}
2355
2356void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
2357{
2358    prepareToReprocessToken();
2359    processEndTag(token);
2360}
2361
2362class HTMLTreeBuilder::FakeInsertionMode {
2363    WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
2364public:
2365    FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2366        : m_treeBuilder(treeBuilder)
2367        , m_originalMode(treeBuilder->insertionMode())
2368    {
2369        m_treeBuilder->setFakeInsertionMode(mode);
2370    }
2371
2372    ~FakeInsertionMode()
2373    {
2374        if (m_treeBuilder->isFakeInsertionMode())
2375            m_treeBuilder->setInsertionMode(m_originalMode);
2376    }
2377
2378private:
2379    HTMLTreeBuilder* m_treeBuilder;
2380    InsertionMode m_originalMode;
2381};
2382
2383void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
2384{
2385    m_hasPendingForeignInsertionModeSteps = true;
2386    {
2387        FakeInsertionMode fakeMode(this, InBodyMode);
2388        processToken(token);
2389    }
2390    if (m_hasPendingForeignInsertionModeSteps)
2391        resetForeignInsertionMode();
2392}
2393
2394void HTMLTreeBuilder::resetForeignInsertionMode()
2395{
2396    if (insertionMode() == InForeignContentMode)
2397        resetInsertionModeAppropriately();
2398}
2399
2400void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2401{
2402    ASSERT(token.type() == HTMLToken::Comment);
2403    if (m_insertionMode == InitialMode
2404        || m_insertionMode == BeforeHTMLMode
2405        || m_insertionMode == AfterAfterBodyMode
2406        || m_insertionMode == AfterAfterFramesetMode) {
2407        m_tree.insertCommentOnDocument(token);
2408        return;
2409    }
2410    if (m_insertionMode == AfterBodyMode) {
2411        m_tree.insertCommentOnHTMLHtmlElement(token);
2412        return;
2413    }
2414    if (m_insertionMode == InTableTextMode) {
2415        defaultForInTableText();
2416        processComment(token);
2417        return;
2418    }
2419    m_tree.insertComment(token);
2420}
2421
2422void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2423{
2424    ASSERT(token.type() == HTMLToken::Character);
2425    ExternalCharacterTokenBuffer buffer(token);
2426    processCharacterBuffer(buffer);
2427}
2428
2429void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2430{
2431ReprocessBuffer:
2432    switch (insertionMode()) {
2433    case InitialMode: {
2434        ASSERT(insertionMode() == InitialMode);
2435        buffer.skipLeadingWhitespace();
2436        if (buffer.isEmpty())
2437            return;
2438        defaultForInitial();
2439        // Fall through.
2440    }
2441    case BeforeHTMLMode: {
2442        ASSERT(insertionMode() == BeforeHTMLMode);
2443        buffer.skipLeadingWhitespace();
2444        if (buffer.isEmpty())
2445            return;
2446        defaultForBeforeHTML();
2447        // Fall through.
2448    }
2449    case BeforeHeadMode: {
2450        ASSERT(insertionMode() == BeforeHeadMode);
2451        buffer.skipLeadingWhitespace();
2452        if (buffer.isEmpty())
2453            return;
2454        defaultForBeforeHead();
2455        // Fall through.
2456    }
2457    case InHeadMode: {
2458        ASSERT(insertionMode() == InHeadMode);
2459        String leadingWhitespace = buffer.takeLeadingWhitespace();
2460        if (!leadingWhitespace.isEmpty())
2461            m_tree.insertTextNode(leadingWhitespace);
2462        if (buffer.isEmpty())
2463            return;
2464        defaultForInHead();
2465        // Fall through.
2466    }
2467    case AfterHeadMode: {
2468        ASSERT(insertionMode() == AfterHeadMode);
2469        String leadingWhitespace = buffer.takeLeadingWhitespace();
2470        if (!leadingWhitespace.isEmpty())
2471            m_tree.insertTextNode(leadingWhitespace);
2472        if (buffer.isEmpty())
2473            return;
2474        defaultForAfterHead();
2475        // Fall through.
2476    }
2477    case InBodyMode:
2478    case InCaptionMode:
2479    case InCellMode: {
2480        ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2481        m_tree.reconstructTheActiveFormattingElements();
2482        String characters = buffer.takeRemaining();
2483        m_tree.insertTextNode(characters);
2484        if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2485            m_framesetOk = false;
2486        break;
2487    }
2488    case InTableMode:
2489    case InTableBodyMode:
2490    case InRowMode: {
2491        ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2492        ASSERT(m_pendingTableCharacters.isEmpty());
2493        m_originalInsertionMode = m_insertionMode;
2494        setInsertionMode(InTableTextMode);
2495        prepareToReprocessToken();
2496        // Fall through.
2497    }
2498    case InTableTextMode: {
2499        buffer.giveRemainingTo(m_pendingTableCharacters);
2500        break;
2501    }
2502    case InColumnGroupMode: {
2503        ASSERT(insertionMode() == InColumnGroupMode);
2504        String leadingWhitespace = buffer.takeLeadingWhitespace();
2505        if (!leadingWhitespace.isEmpty())
2506            m_tree.insertTextNode(leadingWhitespace);
2507        if (buffer.isEmpty())
2508            return;
2509        if (!processColgroupEndTagForInColumnGroup()) {
2510            ASSERT(isParsingFragment());
2511            // The spec tells us to drop these characters on the floor.
2512            buffer.takeLeadingNonWhitespace();
2513            if (buffer.isEmpty())
2514                return;
2515        }
2516        prepareToReprocessToken();
2517        goto ReprocessBuffer;
2518    }
2519    case AfterBodyMode:
2520    case AfterAfterBodyMode: {
2521        ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2522        // FIXME: parse error
2523        setInsertionMode(InBodyMode);
2524        prepareToReprocessToken();
2525        goto ReprocessBuffer;
2526        break;
2527    }
2528    case TextMode: {
2529        ASSERT(insertionMode() == TextMode);
2530        m_tree.insertTextNode(buffer.takeRemaining());
2531        break;
2532    }
2533    case InHeadNoscriptMode: {
2534        ASSERT(insertionMode() == InHeadNoscriptMode);
2535        String leadingWhitespace = buffer.takeLeadingWhitespace();
2536        if (!leadingWhitespace.isEmpty())
2537            m_tree.insertTextNode(leadingWhitespace);
2538        if (buffer.isEmpty())
2539            return;
2540        defaultForInHeadNoscript();
2541        goto ReprocessBuffer;
2542        break;
2543    }
2544    case InFramesetMode:
2545    case AfterFramesetMode: {
2546        ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2547        String leadingWhitespace = buffer.takeRemainingWhitespace();
2548        if (!leadingWhitespace.isEmpty())
2549            m_tree.insertTextNode(leadingWhitespace);
2550        // FIXME: We should generate a parse error if we skipped over any
2551        // non-whitespace characters.
2552        break;
2553    }
2554    case InSelectInTableMode:
2555    case InSelectMode: {
2556        ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2557        m_tree.insertTextNode(buffer.takeRemaining());
2558        break;
2559    }
2560    case InForeignContentMode: {
2561        ASSERT(insertionMode() == InForeignContentMode);
2562        String characters = buffer.takeRemaining();
2563        m_tree.insertTextNode(characters);
2564        if (m_framesetOk && !isAllWhitespace(characters))
2565            m_framesetOk = false;
2566        break;
2567    }
2568    case AfterAfterFramesetMode: {
2569        String leadingWhitespace = buffer.takeRemainingWhitespace();
2570        if (!leadingWhitespace.isEmpty()) {
2571            m_tree.reconstructTheActiveFormattingElements();
2572            m_tree.insertTextNode(leadingWhitespace);
2573        }
2574        // FIXME: We should generate a parse error if we skipped over any
2575        // non-whitespace characters.
2576        break;
2577    }
2578    }
2579}
2580
2581void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2582{
2583    ASSERT(token.type() == HTMLToken::EndOfFile);
2584    switch (insertionMode()) {
2585    case InitialMode:
2586        ASSERT(insertionMode() == InitialMode);
2587        defaultForInitial();
2588        // Fall through.
2589    case BeforeHTMLMode:
2590        ASSERT(insertionMode() == BeforeHTMLMode);
2591        defaultForBeforeHTML();
2592        // Fall through.
2593    case BeforeHeadMode:
2594        ASSERT(insertionMode() == BeforeHeadMode);
2595        defaultForBeforeHead();
2596        // Fall through.
2597    case InHeadMode:
2598        ASSERT(insertionMode() == InHeadMode);
2599        defaultForInHead();
2600        // Fall through.
2601    case AfterHeadMode:
2602        ASSERT(insertionMode() == AfterHeadMode);
2603        defaultForAfterHead();
2604        // Fall through
2605    case InBodyMode:
2606    case InCellMode:
2607    case InCaptionMode:
2608    case InRowMode:
2609        ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2610        notImplemented(); // Emit parse error based on what elements are still open.
2611        break;
2612    case AfterBodyMode:
2613    case AfterAfterBodyMode:
2614        ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2615        break;
2616    case InHeadNoscriptMode:
2617        ASSERT(insertionMode() == InHeadNoscriptMode);
2618        defaultForInHeadNoscript();
2619        processEndOfFile(token);
2620        return;
2621    case AfterFramesetMode:
2622    case AfterAfterFramesetMode:
2623        ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2624        break;
2625    case InFramesetMode:
2626    case InTableMode:
2627    case InTableBodyMode:
2628    case InSelectInTableMode:
2629    case InSelectMode:
2630        ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2631        if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2632            parseError(token);
2633        break;
2634    case InColumnGroupMode:
2635        if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2636            ASSERT(isParsingFragment());
2637            return; // FIXME: Should we break here instead of returning?
2638        }
2639        if (!processColgroupEndTagForInColumnGroup()) {
2640            ASSERT(isParsingFragment());
2641            return; // FIXME: Should we break here instead of returning?
2642        }
2643        prepareToReprocessToken();
2644        processEndOfFile(token);
2645        return;
2646    case InForeignContentMode:
2647        setInsertionMode(InBodyMode);
2648        processEndOfFile(token);
2649        return;
2650    case InTableTextMode:
2651        defaultForInTableText();
2652        processEndOfFile(token);
2653        return;
2654    case TextMode:
2655        parseError(token);
2656        if (m_tree.currentNode()->hasTagName(scriptTag))
2657            notImplemented(); // mark the script element as "already started".
2658        m_tree.openElements()->pop();
2659        setInsertionMode(m_originalInsertionMode);
2660        prepareToReprocessToken();
2661        processEndOfFile(token);
2662        return;
2663    }
2664    ASSERT(m_tree.currentNode());
2665    m_tree.openElements()->popAll();
2666}
2667
2668void HTMLTreeBuilder::defaultForInitial()
2669{
2670    notImplemented();
2671    if (!m_fragmentContext.fragment())
2672        m_document->setCompatibilityMode(Document::QuirksMode);
2673    // FIXME: parse error
2674    setInsertionMode(BeforeHTMLMode);
2675    prepareToReprocessToken();
2676}
2677
2678void HTMLTreeBuilder::defaultForBeforeHTML()
2679{
2680    AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2681    m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2682    setInsertionMode(BeforeHeadMode);
2683    prepareToReprocessToken();
2684}
2685
2686void HTMLTreeBuilder::defaultForBeforeHead()
2687{
2688    AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2689    processStartTag(startHead);
2690    prepareToReprocessToken();
2691}
2692
2693void HTMLTreeBuilder::defaultForInHead()
2694{
2695    AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2696    processEndTag(endHead);
2697    prepareToReprocessToken();
2698}
2699
2700void HTMLTreeBuilder::defaultForInHeadNoscript()
2701{
2702    AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2703    processEndTag(endNoscript);
2704    prepareToReprocessToken();
2705}
2706
2707void HTMLTreeBuilder::defaultForAfterHead()
2708{
2709    AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2710    processStartTag(startBody);
2711    m_framesetOk = true;
2712    prepareToReprocessToken();
2713}
2714
2715void HTMLTreeBuilder::defaultForInTableText()
2716{
2717    String characters = String::adopt(m_pendingTableCharacters);
2718    if (!isAllWhitespace(characters)) {
2719        // FIXME: parse error
2720        HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2721        m_tree.reconstructTheActiveFormattingElements();
2722        m_tree.insertTextNode(characters);
2723        m_framesetOk = false;
2724        setInsertionMode(m_originalInsertionMode);
2725        prepareToReprocessToken();
2726        return;
2727    }
2728    m_tree.insertTextNode(characters);
2729    setInsertionMode(m_originalInsertionMode);
2730    prepareToReprocessToken();
2731}
2732
2733bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2734{
2735    ASSERT(token.type() == HTMLToken::StartTag);
2736    if (token.name() == htmlTag) {
2737        m_tree.insertHTMLHtmlStartTagInBody(token);
2738        return true;
2739    }
2740    if (token.name() == baseTag
2741        || token.name() == basefontTag
2742        || token.name() == bgsoundTag
2743        || token.name() == commandTag
2744        || token.name() == linkTag
2745        || token.name() == metaTag) {
2746        m_tree.insertSelfClosingHTMLElement(token);
2747        // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2748        return true;
2749    }
2750    if (token.name() == titleTag) {
2751        processGenericRCDATAStartTag(token);
2752        return true;
2753    }
2754    if (token.name() == noscriptTag) {
2755        if (scriptEnabled(m_document->frame())) {
2756            processGenericRawTextStartTag(token);
2757            return true;
2758        }
2759        m_tree.insertHTMLElement(token);
2760        setInsertionMode(InHeadNoscriptMode);
2761        return true;
2762    }
2763    if (token.name() == noframesTag || token.name() == styleTag) {
2764        processGenericRawTextStartTag(token);
2765        return true;
2766    }
2767    if (token.name() == scriptTag) {
2768        processScriptStartTag(token);
2769        if (m_usePreHTML5ParserQuirks && token.selfClosing())
2770            processFakeEndTag(scriptTag);
2771        return true;
2772    }
2773    if (token.name() == headTag) {
2774        parseError(token);
2775        return true;
2776    }
2777    return false;
2778}
2779
2780void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2781{
2782    ASSERT(token.type() == HTMLToken::StartTag);
2783    m_tree.insertHTMLElement(token);
2784    m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2785    m_originalInsertionMode = m_insertionMode;
2786    setInsertionMode(TextMode);
2787}
2788
2789void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2790{
2791    ASSERT(token.type() == HTMLToken::StartTag);
2792    m_tree.insertHTMLElement(token);
2793    m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2794    m_originalInsertionMode = m_insertionMode;
2795    setInsertionMode(TextMode);
2796}
2797
2798void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2799{
2800    ASSERT(token.type() == HTMLToken::StartTag);
2801    m_tree.insertScriptElement(token);
2802    m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2803    m_originalInsertionMode = m_insertionMode;
2804
2805    TextPosition0 position = m_parser->textPosition();
2806
2807    ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
2808
2809    m_lastScriptElementStartPosition = position;
2810
2811    setInsertionMode(TextMode);
2812}
2813
2814void HTMLTreeBuilder::finished()
2815{
2816    if (isParsingFragment())
2817        return;
2818
2819    ASSERT(m_document);
2820    // Warning, this may detach the parser. Do not do anything else after this.
2821    m_document->finishedParsing();
2822}
2823
2824void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2825{
2826}
2827
2828bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2829{
2830    if (!frame)
2831        return false;
2832    return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2833}
2834
2835bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2836{
2837    if (!frame)
2838        return false;
2839    return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2840}
2841
2842}
2843