1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#ifndef HTMLTreeBuilder_h
28#define HTMLTreeBuilder_h
29
30#include "Element.h"
31#include "FragmentScriptingPermission.h"
32#include "HTMLConstructionSite.h"
33#include "HTMLElementStack.h"
34#include "HTMLFormattingElementList.h"
35#include "HTMLTokenizer.h"
36#include <wtf/text/TextPosition.h>
37#include <wtf/Noncopyable.h>
38#include <wtf/OwnPtr.h>
39#include <wtf/PassOwnPtr.h>
40#include <wtf/PassRefPtr.h>
41#include <wtf/RefPtr.h>
42#include <wtf/unicode/Unicode.h>
43
44namespace WebCore {
45
46class AtomicHTMLToken;
47class Document;
48class DocumentFragment;
49class Frame;
50class HTMLToken;
51class HTMLDocument;
52class Node;
53class HTMLDocumentParser;
54
55class HTMLTreeBuilder {
56    WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED;
57public:
58    static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
59    {
60        return adoptPtr(new HTMLTreeBuilder(parser, document, reportErrors, usePreHTML5ParserQuirks));
61    }
62    static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
63    {
64        return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks));
65    }
66    ~HTMLTreeBuilder();
67
68    bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
69
70    void detach();
71
72    void setPaused(bool paused) { m_isPaused = paused; }
73    bool isPaused() const { return m_isPaused; }
74
75    // The token really should be passed as a const& since it's never modified.
76    void constructTreeFromToken(HTMLToken&);
77    void constructTreeFromAtomicToken(AtomicHTMLToken&);
78
79    // Must be called when parser is paused before calling the parser again.
80    PassRefPtr<Element> takeScriptToProcess(TextPosition1& scriptStartPosition);
81
82    // Done, close any open tags, etc.
83    void finished();
84
85    static bool scriptEnabled(Frame*);
86    static bool pluginsEnabled(Frame*);
87
88private:
89    class FakeInsertionMode;
90    class ExternalCharacterTokenBuffer;
91    // Represents HTML5 "insertion mode"
92    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
93    enum InsertionMode {
94        InitialMode,
95        BeforeHTMLMode,
96        BeforeHeadMode,
97        InHeadMode,
98        InHeadNoscriptMode,
99        AfterHeadMode,
100        InBodyMode,
101        TextMode,
102        InTableMode,
103        InTableTextMode,
104        InCaptionMode,
105        InColumnGroupMode,
106        InTableBodyMode,
107        InRowMode,
108        InCellMode,
109        InSelectMode,
110        InSelectInTableMode,
111        InForeignContentMode,
112        AfterBodyMode,
113        InFramesetMode,
114        AfterFramesetMode,
115        AfterAfterBodyMode,
116        AfterAfterFramesetMode,
117    };
118
119    HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument*, bool reportErrors, bool usePreHTML5ParserQuirks);
120    HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment*, Element* contextElement, FragmentScriptingPermission, bool usePreHTML5ParserQuirks);
121
122    void processToken(AtomicHTMLToken&);
123
124    void processDoctypeToken(AtomicHTMLToken&);
125    void processStartTag(AtomicHTMLToken&);
126    void processEndTag(AtomicHTMLToken&);
127    void processComment(AtomicHTMLToken&);
128    void processCharacter(AtomicHTMLToken&);
129    void processEndOfFile(AtomicHTMLToken&);
130
131    bool processStartTagForInHead(AtomicHTMLToken&);
132    void processStartTagForInBody(AtomicHTMLToken&);
133    void processStartTagForInTable(AtomicHTMLToken&);
134    void processEndTagForInBody(AtomicHTMLToken&);
135    void processEndTagForInTable(AtomicHTMLToken&);
136    void processEndTagForInTableBody(AtomicHTMLToken&);
137    void processEndTagForInRow(AtomicHTMLToken&);
138    void processEndTagForInCell(AtomicHTMLToken&);
139
140    void processIsindexStartTagForInBody(AtomicHTMLToken&);
141    bool processBodyEndTagForInBody(AtomicHTMLToken&);
142    bool processTableEndTagForInTable();
143    bool processCaptionEndTagForInCaption();
144    bool processColgroupEndTagForInColumnGroup();
145    bool processTrEndTagForInRow();
146    // FIXME: This function should be inlined into its one call site or it
147    // needs to assert which tokens it can be called with.
148    void processAnyOtherEndTagForInBody(AtomicHTMLToken&);
149
150    void processCharacterBuffer(ExternalCharacterTokenBuffer&);
151
152    void processFakeStartTag(const QualifiedName&, PassRefPtr<NamedNodeMap> attributes = 0);
153    void processFakeEndTag(const QualifiedName&);
154    void processFakeCharacters(const String&);
155    void processFakePEndTagIfPInButtonScope();
156
157    void processGenericRCDATAStartTag(AtomicHTMLToken&);
158    void processGenericRawTextStartTag(AtomicHTMLToken&);
159    void processScriptStartTag(AtomicHTMLToken&);
160
161    // Default processing for the different insertion modes.
162    void defaultForInitial();
163    void defaultForBeforeHTML();
164    void defaultForBeforeHead();
165    void defaultForInHead();
166    void defaultForInHeadNoscript();
167    void defaultForAfterHead();
168    void defaultForInTableText();
169
170    void prepareToReprocessToken();
171
172    void reprocessStartTag(AtomicHTMLToken&);
173    void reprocessEndTag(AtomicHTMLToken&);
174
175    PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&);
176
177    HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*);
178    void callTheAdoptionAgency(AtomicHTMLToken&);
179
180    void closeTheCell();
181
182    template <bool shouldClose(const ContainerNode*)>
183    void processCloseWhenNestedTag(AtomicHTMLToken&);
184
185    bool m_framesetOk;
186
187    void parseError(AtomicHTMLToken&);
188
189    InsertionMode insertionMode() const { return m_insertionMode; }
190    void setInsertionMode(InsertionMode mode)
191    {
192        m_insertionMode = mode;
193        m_isFakeInsertionMode = false;
194    }
195
196    bool isFakeInsertionMode() { return m_isFakeInsertionMode; }
197    void setFakeInsertionMode(InsertionMode mode)
198    {
199        m_insertionMode = mode;
200        m_isFakeInsertionMode = true;
201    }
202
203    void resetInsertionModeAppropriately();
204
205    void processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token);
206    void resetForeignInsertionMode();
207
208    class FragmentParsingContext {
209        WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
210    public:
211        FragmentParsingContext();
212        FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
213        ~FragmentParsingContext();
214
215        DocumentFragment* fragment() const { return m_fragment; }
216        Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
217        FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; }
218
219    private:
220        DocumentFragment* m_fragment;
221        Element* m_contextElement;
222
223        // FragmentScriptingNotAllowed causes the Parser to remove children
224        // from <script> tags (so javascript doesn't show up in pastes).
225        FragmentScriptingPermission m_scriptingPermission;
226    };
227
228    FragmentParsingContext m_fragmentContext;
229
230    Document* m_document;
231    HTMLConstructionSite m_tree;
232
233    bool m_reportErrors;
234    bool m_isPaused;
235    bool m_isFakeInsertionMode;
236
237    // FIXME: InsertionModes should be a separate object to prevent direct
238    // manipulation of these variables.  For now, be careful to always use
239    // setInsertionMode and never set m_insertionMode directly.
240    InsertionMode m_insertionMode;
241    InsertionMode m_originalInsertionMode;
242
243    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
244    Vector<UChar> m_pendingTableCharacters;
245
246    // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
247    // from within parser actions. We also need it to track the current position.
248    HTMLDocumentParser* m_parser;
249
250    RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
251    TextPosition1 m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
252
253    // FIXME: We probably want to remove this member.  Originally, it was
254    // created to service the legacy tree builder, but it seems to be used for
255    // some other things now.
256    TextPosition0 m_lastScriptElementStartPosition;
257
258    bool m_usePreHTML5ParserQuirks;
259
260    bool m_hasPendingForeignInsertionModeSteps;
261};
262
263}
264
265#endif
266