1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#ifndef HTMLTreeBuilder_h
28#define HTMLTreeBuilder_h
29
30#include "core/html/parser/HTMLConstructionSite.h"
31#include "core/html/parser/HTMLElementStack.h"
32#include "core/html/parser/HTMLParserOptions.h"
33#include "platform/heap/Handle.h"
34#include "wtf/Noncopyable.h"
35#include "wtf/PassOwnPtr.h"
36#include "wtf/PassRefPtr.h"
37#include "wtf/RefPtr.h"
38#include "wtf/Vector.h"
39#include "wtf/text/StringBuilder.h"
40#include "wtf/text/TextPosition.h"
41
42namespace WebCore {
43
44class AtomicHTMLToken;
45class Document;
46class DocumentFragment;
47class Element;
48class LocalFrame;
49class HTMLToken;
50class HTMLDocument;
51class Node;
52class HTMLDocumentParser;
53
54class HTMLTreeBuilder FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLTreeBuilder> {
55    WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED;
56public:
57    static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options)
58    {
59        return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options));
60    }
61    static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
62    {
63        return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options));
64    }
65    ~HTMLTreeBuilder();
66    void trace(Visitor*);
67
68    const HTMLElementStack* openElements() const { return m_tree.openElements(); }
69
70    bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
71    bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); }
72    bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); }
73
74    void detach();
75
76    void constructTree(AtomicHTMLToken*);
77
78    bool hasParserBlockingScript() const { return !!m_scriptToProcess; }
79    // Must be called to take the parser-blocking script before calling the parser again.
80    PassRefPtrWillBeRawPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition);
81
82    // Done, close any open tags, etc.
83    void finished();
84
85    // Synchronously empty any queues, possibly creating more DOM nodes.
86    void flush() { m_tree.flush(); }
87
88    void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
89
90private:
91    class CharacterTokenBuffer;
92    // Represents HTML5 "insertion mode"
93    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
94    enum InsertionMode {
95        InitialMode,
96        BeforeHTMLMode,
97        BeforeHeadMode,
98        InHeadMode,
99        InHeadNoscriptMode,
100        AfterHeadMode,
101        TemplateContentsMode,
102        InBodyMode,
103        TextMode,
104        InTableMode,
105        InTableTextMode,
106        InCaptionMode,
107        InColumnGroupMode,
108        InTableBodyMode,
109        InRowMode,
110        InCellMode,
111        InSelectMode,
112        InSelectInTableMode,
113        AfterBodyMode,
114        InFramesetMode,
115        AfterFramesetMode,
116        AfterAfterBodyMode,
117        AfterAfterFramesetMode,
118    };
119
120    HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&);
121    HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&);
122
123    void processToken(AtomicHTMLToken*);
124
125    void processDoctypeToken(AtomicHTMLToken*);
126    void processStartTag(AtomicHTMLToken*);
127    void processEndTag(AtomicHTMLToken*);
128    void processComment(AtomicHTMLToken*);
129    void processCharacter(AtomicHTMLToken*);
130    void processEndOfFile(AtomicHTMLToken*);
131
132    bool processStartTagForInHead(AtomicHTMLToken*);
133    void processStartTagForInBody(AtomicHTMLToken*);
134    void processStartTagForInTable(AtomicHTMLToken*);
135    void processEndTagForInBody(AtomicHTMLToken*);
136    void processEndTagForInTable(AtomicHTMLToken*);
137    void processEndTagForInTableBody(AtomicHTMLToken*);
138    void processEndTagForInRow(AtomicHTMLToken*);
139    void processEndTagForInCell(AtomicHTMLToken*);
140
141    void processIsindexStartTagForInBody(AtomicHTMLToken*);
142    void processHtmlStartTagForInBody(AtomicHTMLToken*);
143    bool processBodyEndTagForInBody(AtomicHTMLToken*);
144    bool processTableEndTagForInTable();
145    bool processCaptionEndTagForInCaption();
146    bool processColgroupEndTagForInColumnGroup();
147    bool processTrEndTagForInRow();
148    // FIXME: This function should be inlined into its one call site or it
149    // needs to assert which tokens it can be called with.
150    void processAnyOtherEndTagForInBody(AtomicHTMLToken*);
151
152    void processCharacterBuffer(CharacterTokenBuffer&);
153    inline void processCharacterBufferForInBody(CharacterTokenBuffer&);
154
155    void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>());
156    void processFakeEndTag(const QualifiedName&);
157    void processFakeEndTag(const AtomicString&);
158    void processFakePEndTagIfPInButtonScope();
159
160    void processGenericRCDATAStartTag(AtomicHTMLToken*);
161    void processGenericRawTextStartTag(AtomicHTMLToken*);
162    void processScriptStartTag(AtomicHTMLToken*);
163
164    // Default processing for the different insertion modes.
165    void defaultForInitial();
166    void defaultForBeforeHTML();
167    void defaultForBeforeHead();
168    void defaultForInHead();
169    void defaultForInHeadNoscript();
170    void defaultForAfterHead();
171    void defaultForInTableText();
172
173    inline HTMLStackItem* adjustedCurrentStackItem() const;
174    inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*);
175    void processTokenInForeignContent(AtomicHTMLToken*);
176
177    Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*);
178
179    void callTheAdoptionAgency(AtomicHTMLToken*);
180
181    void closeTheCell();
182
183    template <bool shouldClose(const HTMLStackItem*)>
184    void processCloseWhenNestedTag(AtomicHTMLToken*);
185
186    void parseError(AtomicHTMLToken*);
187
188    InsertionMode insertionMode() const { return m_insertionMode; }
189    void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; }
190
191    void resetInsertionModeAppropriately();
192
193    void processTemplateStartTag(AtomicHTMLToken*);
194    bool processTemplateEndTag(AtomicHTMLToken*);
195    bool processEndOfFileForInTemplateContents(AtomicHTMLToken*);
196
197    class FragmentParsingContext {
198        WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
199        DISALLOW_ALLOCATION();
200    public:
201        FragmentParsingContext();
202        FragmentParsingContext(DocumentFragment*, Element* contextElement);
203        ~FragmentParsingContext();
204
205        DocumentFragment* fragment() const { return m_fragment; }
206        Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); }
207        HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); }
208
209        void trace(Visitor*);
210
211    private:
212        RawPtrWillBeMember<DocumentFragment> m_fragment;
213        RefPtrWillBeMember<HTMLStackItem> m_contextElementStackItem;
214    };
215
216    bool m_framesetOk;
217#ifndef NDEBUG
218    bool m_isAttached;
219#endif
220    FragmentParsingContext m_fragmentContext;
221    HTMLConstructionSite m_tree;
222
223    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
224    InsertionMode m_insertionMode;
225
226    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode
227    InsertionMode m_originalInsertionMode;
228
229    Vector<InsertionMode> m_templateInsertionModes;
230
231    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
232    StringBuilder m_pendingTableCharacters;
233
234    bool m_shouldSkipLeadingNewline;
235
236    // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
237    // from within parser actions. We also need it to track the current position.
238    RawPtrWillBeMember<HTMLDocumentParser> m_parser;
239
240    RefPtrWillBeMember<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
241    TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
242
243    HTMLParserOptions m_options;
244};
245
246}
247
248#endif
249