1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#ifndef HTMLTreeBuilder_h
28#define HTMLTreeBuilder_h
29
30#include "core/html/parser/HTMLConstructionSite.h"
31#include "core/html/parser/HTMLElementStack.h"
32#include "core/html/parser/HTMLParserOptions.h"
33#include "platform/heap/Handle.h"
34#include "wtf/Noncopyable.h"
35#include "wtf/PassOwnPtr.h"
36#include "wtf/PassRefPtr.h"
37#include "wtf/RefPtr.h"
38#include "wtf/Vector.h"
39#include "wtf/text/StringBuilder.h"
40#include "wtf/text/TextPosition.h"
41
42namespace blink {
43
44class AtomicHTMLToken;
45class Document;
46class DocumentFragment;
47class Element;
48class LocalFrame;
49class HTMLToken;
50class HTMLDocument;
51class Node;
52class HTMLDocumentParser;
53
54class HTMLTreeBuilder FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLTreeBuilder> {
55    WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED;
56public:
57    static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options)
58    {
59        return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options));
60    }
61    static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
62    {
63        return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options));
64    }
65    ~HTMLTreeBuilder();
66    void trace(Visitor*);
67
68    const HTMLElementStack* openElements() const { return m_tree.openElements(); }
69
70    bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
71    bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); }
72    bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); }
73
74    void detach();
75
76    void constructTree(AtomicHTMLToken*);
77
78    bool hasParserBlockingScript() const { return !!m_scriptToProcess; }
79    // Must be called to take the parser-blocking script before calling the parser again.
80    PassRefPtrWillBeRawPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition);
81
82    // Done, close any open tags, etc.
83    void finished();
84
85    // Synchronously flush pending text and queued tasks, possibly creating more DOM nodes.
86    // Flushing pending text depends on |mode|.
87    void flush(FlushMode mode) { m_tree.flush(mode); }
88
89    void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
90
91private:
92    class CharacterTokenBuffer;
93    // Represents HTML5 "insertion mode"
94    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
95    enum InsertionMode {
96        InitialMode,
97        BeforeHTMLMode,
98        BeforeHeadMode,
99        InHeadMode,
100        InHeadNoscriptMode,
101        AfterHeadMode,
102        TemplateContentsMode,
103        InBodyMode,
104        TextMode,
105        InTableMode,
106        InTableTextMode,
107        InCaptionMode,
108        InColumnGroupMode,
109        InTableBodyMode,
110        InRowMode,
111        InCellMode,
112        InSelectMode,
113        InSelectInTableMode,
114        AfterBodyMode,
115        InFramesetMode,
116        AfterFramesetMode,
117        AfterAfterBodyMode,
118        AfterAfterFramesetMode,
119    };
120
121    HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&);
122    HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&);
123
124    void processToken(AtomicHTMLToken*);
125
126    void processDoctypeToken(AtomicHTMLToken*);
127    void processStartTag(AtomicHTMLToken*);
128    void processEndTag(AtomicHTMLToken*);
129    void processComment(AtomicHTMLToken*);
130    void processCharacter(AtomicHTMLToken*);
131    void processEndOfFile(AtomicHTMLToken*);
132
133    bool processStartTagForInHead(AtomicHTMLToken*);
134    void processStartTagForInBody(AtomicHTMLToken*);
135    void processStartTagForInTable(AtomicHTMLToken*);
136    void processEndTagForInBody(AtomicHTMLToken*);
137    void processEndTagForInTable(AtomicHTMLToken*);
138    void processEndTagForInTableBody(AtomicHTMLToken*);
139    void processEndTagForInRow(AtomicHTMLToken*);
140    void processEndTagForInCell(AtomicHTMLToken*);
141
142    void processIsindexStartTagForInBody(AtomicHTMLToken*);
143    void processHtmlStartTagForInBody(AtomicHTMLToken*);
144    bool processBodyEndTagForInBody(AtomicHTMLToken*);
145    bool processTableEndTagForInTable();
146    bool processCaptionEndTagForInCaption();
147    bool processColgroupEndTagForInColumnGroup();
148    bool processTrEndTagForInRow();
149    // FIXME: This function should be inlined into its one call site or it
150    // needs to assert which tokens it can be called with.
151    void processAnyOtherEndTagForInBody(AtomicHTMLToken*);
152
153    void processCharacterBuffer(CharacterTokenBuffer&);
154    inline void processCharacterBufferForInBody(CharacterTokenBuffer&);
155
156    void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>());
157    void processFakeEndTag(const QualifiedName&);
158    void processFakeEndTag(const AtomicString&);
159    void processFakePEndTagIfPInButtonScope();
160
161    void processGenericRCDATAStartTag(AtomicHTMLToken*);
162    void processGenericRawTextStartTag(AtomicHTMLToken*);
163    void processScriptStartTag(AtomicHTMLToken*);
164
165    // Default processing for the different insertion modes.
166    void defaultForInitial();
167    void defaultForBeforeHTML();
168    void defaultForBeforeHead();
169    void defaultForInHead();
170    void defaultForInHeadNoscript();
171    void defaultForAfterHead();
172    void defaultForInTableText();
173
174    inline HTMLStackItem* adjustedCurrentStackItem() const;
175    inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*);
176    void processTokenInForeignContent(AtomicHTMLToken*);
177
178    Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*);
179
180    void callTheAdoptionAgency(AtomicHTMLToken*);
181
182    void closeTheCell();
183
184    template <bool shouldClose(const HTMLStackItem*)>
185    void processCloseWhenNestedTag(AtomicHTMLToken*);
186
187    void parseError(AtomicHTMLToken*);
188
189    InsertionMode insertionMode() const { return m_insertionMode; }
190    void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; }
191
192    void resetInsertionModeAppropriately();
193
194    void processTemplateStartTag(AtomicHTMLToken*);
195    bool processTemplateEndTag(AtomicHTMLToken*);
196    bool processEndOfFileForInTemplateContents(AtomicHTMLToken*);
197
198    class FragmentParsingContext {
199        WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
200        DISALLOW_ALLOCATION();
201    public:
202        FragmentParsingContext();
203        FragmentParsingContext(DocumentFragment*, Element* contextElement);
204        ~FragmentParsingContext();
205
206        DocumentFragment* fragment() const { return m_fragment; }
207        Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); }
208        HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); }
209
210        void trace(Visitor*);
211
212    private:
213        RawPtrWillBeMember<DocumentFragment> m_fragment;
214        RefPtrWillBeMember<HTMLStackItem> m_contextElementStackItem;
215    };
216
217    bool m_framesetOk;
218#if ENABLE(ASSERT)
219    bool m_isAttached;
220#endif
221    FragmentParsingContext m_fragmentContext;
222    HTMLConstructionSite m_tree;
223
224    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
225    InsertionMode m_insertionMode;
226
227    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode
228    InsertionMode m_originalInsertionMode;
229
230    Vector<InsertionMode> m_templateInsertionModes;
231
232    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
233    StringBuilder m_pendingTableCharacters;
234
235    bool m_shouldSkipLeadingNewline;
236
237    // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
238    // from within parser actions. We also need it to track the current position.
239    RawPtrWillBeMember<HTMLDocumentParser> m_parser;
240
241    RefPtrWillBeMember<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
242    TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
243
244    HTMLParserOptions m_options;
245};
246
247}
248
249#endif
250