1926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)/*
2926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
4926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without
5926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * modification, are permitted provided that the following conditions
6926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * are met:
7926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
8926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
9926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
10926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
11926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    documentation and/or other materials provided with the distribution.
12926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
13926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) */
25926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
26926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#include "config.h"
2753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLTreeBuilderSimulator.h"
28926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
295d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)#include "core/HTMLNames.h"
305d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)#include "core/MathMLNames.h"
315d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)#include "core/SVGNames.h"
3253e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLParserIdioms.h"
3353e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLTokenizer.h"
3453e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLTreeBuilder.h"
35926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
36c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink {
37926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
38926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)using namespace HTMLNames;
39926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
40926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)static bool tokenExitsForeignContent(const CompactHTMLToken& token)
41926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
42926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // FIXME: This is copied from HTMLTreeBuilder::processTokenInForeignContent and changed to use threadSafeHTMLNamesMatch.
4309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    const String& tagName = token.data();
4409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    return threadSafeMatch(tagName, bTag)
4509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, bigTag)
4609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, blockquoteTag)
4709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, bodyTag)
4809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, brTag)
4909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, centerTag)
5009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, codeTag)
5109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, ddTag)
5209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, divTag)
5309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, dlTag)
5409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, dtTag)
5509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, emTag)
5609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, embedTag)
5709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, h1Tag)
5809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, h2Tag)
5909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, h3Tag)
6009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, h4Tag)
6109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, h5Tag)
6209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, h6Tag)
6309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, headTag)
6409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, hrTag)
6509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, iTag)
6609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, imgTag)
6709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, liTag)
6809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, listingTag)
6909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, menuTag)
7009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, metaTag)
7109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, nobrTag)
7209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, olTag)
7309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, pTag)
7409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, preTag)
7509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, rubyTag)
7609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, sTag)
7709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, smallTag)
7809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, spanTag)
7909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, strongTag)
8009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, strikeTag)
8109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, subTag)
8209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, supTag)
8309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, tableTag)
8409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, ttTag)
8509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, uTag)
8609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, ulTag)
8709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || threadSafeMatch(tagName, varTag)
8809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        || (threadSafeMatch(tagName, fontTag) && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)));
89926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
90926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
91926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)static bool tokenExitsSVG(const CompactHTMLToken& token)
92926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
93926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // FIXME: It's very fragile that we special case foreignObject here to be case-insensitive.
9409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    return equalIgnoringCaseNonNull(token.data().impl(), SVGNames::foreignObjectTag.localName().impl());
95926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
96926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
97926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)static bool tokenExitsMath(const CompactHTMLToken& token)
98926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
99926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint and changed to use threadSafeMatch.
10009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    const String& tagName = token.data();
101926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return threadSafeMatch(tagName, MathMLNames::miTag)
102926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        || threadSafeMatch(tagName, MathMLNames::moTag)
103926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        || threadSafeMatch(tagName, MathMLNames::mnTag)
104926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        || threadSafeMatch(tagName, MathMLNames::msTag)
105926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        || threadSafeMatch(tagName, MathMLNames::mtextTag);
106926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
107926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
108926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(const HTMLParserOptions& options)
109926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    : m_options(options)
110926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
111926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_namespaceStack.append(HTML);
112926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
113926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
114926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)HTMLTreeBuilderSimulator::State HTMLTreeBuilderSimulator::stateFor(HTMLTreeBuilder* treeBuilder)
115926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
116926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(isMainThread());
117926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    State namespaceStack;
118926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (HTMLElementStack::ElementRecord* record = treeBuilder->openElements()->topRecord(); record; record = record->next()) {
119926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        Namespace currentNamespace = HTML;
120926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (record->namespaceURI() == SVGNames::svgNamespaceURI)
121926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            currentNamespace = SVG;
122926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        else if (record->namespaceURI() == MathMLNames::mathmlNamespaceURI)
123926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            currentNamespace = MathML;
124926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
125926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (namespaceStack.isEmpty() || namespaceStack.last() != currentNamespace)
126926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            namespaceStack.append(currentNamespace);
127926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
128926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    namespaceStack.reverse();
129926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return namespaceStack;
130926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
131926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
132926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool HTMLTreeBuilderSimulator::simulate(const CompactHTMLToken& token, HTMLTokenizer* tokenizer)
133926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
134926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (token.type() == HTMLToken::StartTag) {
13509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        const String& tagName = token.data();
136926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (threadSafeMatch(tagName, SVGNames::svgTag))
137926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_namespaceStack.append(SVG);
138926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (threadSafeMatch(tagName, MathMLNames::mathTag))
139926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_namespaceStack.append(MathML);
140926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (inForeignContent() && tokenExitsForeignContent(token))
141926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_namespaceStack.removeLast();
142926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if ((m_namespaceStack.last() == SVG && tokenExitsSVG(token))
143926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            || (m_namespaceStack.last() == MathML && tokenExitsMath(token)))
144926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_namespaceStack.append(HTML);
145926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (!inForeignContent()) {
146926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            // FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches.
14709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            if (threadSafeMatch(tagName, textareaTag) || threadSafeMatch(tagName, titleTag))
148926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                tokenizer->setState(HTMLTokenizer::RCDATAState);
14909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            else if (threadSafeMatch(tagName, plaintextTag))
150926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
15109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            else if (threadSafeMatch(tagName, scriptTag))
152926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                tokenizer->setState(HTMLTokenizer::ScriptDataState);
15309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            else if (threadSafeMatch(tagName, styleTag)
15409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                || threadSafeMatch(tagName, iframeTag)
15509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                || threadSafeMatch(tagName, xmpTag)
15609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                || (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled)
15709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                || threadSafeMatch(tagName, noframesTag)
15809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                || (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))
159926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                tokenizer->setState(HTMLTokenizer::RAWTEXTState);
160926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        }
161926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
162926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
163926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (token.type() == HTMLToken::EndTag) {
16409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        const String& tagName = token.data();
165926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if ((m_namespaceStack.last() == SVG && threadSafeMatch(tagName, SVGNames::svgTag))
166926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            || (m_namespaceStack.last() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag))
167926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            || (m_namespaceStack.contains(SVG) && m_namespaceStack.last() == HTML && tokenExitsSVG(token))
168926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            || (m_namespaceStack.contains(MathML) && m_namespaceStack.last() == HTML && tokenExitsMath(token)))
169926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_namespaceStack.removeLast();
17009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        if (threadSafeMatch(tagName, scriptTag)) {
171926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            if (!inForeignContent())
172926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                tokenizer->setState(HTMLTokenizer::DataState);
173926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            return false;
174926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        }
175926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
176926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
177926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // FIXME: Also setForceNullCharacterReplacement when in text mode.
178926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    tokenizer->setForceNullCharacterReplacement(inForeignContent());
179926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    tokenizer->setShouldAllowCDATA(inForeignContent());
180926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return true;
181926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
182926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
183926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
184