1/* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#ifndef HTMLTreeBuilder_h 28#define HTMLTreeBuilder_h 29 30#include "core/html/parser/HTMLConstructionSite.h" 31#include "core/html/parser/HTMLElementStack.h" 32#include "core/html/parser/HTMLParserOptions.h" 33#include "platform/heap/Handle.h" 34#include "wtf/Noncopyable.h" 35#include "wtf/PassOwnPtr.h" 36#include "wtf/PassRefPtr.h" 37#include "wtf/RefPtr.h" 38#include "wtf/Vector.h" 39#include "wtf/text/StringBuilder.h" 40#include "wtf/text/TextPosition.h" 41 42namespace WebCore { 43 44class AtomicHTMLToken; 45class Document; 46class DocumentFragment; 47class Element; 48class LocalFrame; 49class HTMLToken; 50class HTMLDocument; 51class Node; 52class HTMLDocumentParser; 53 54class HTMLTreeBuilder FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLTreeBuilder> { 55 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED; 56public: 57 static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options) 58 { 59 return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options)); 60 } 61 static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options) 62 { 63 return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options)); 64 } 65 ~HTMLTreeBuilder(); 66 void trace(Visitor*); 67 68 const HTMLElementStack* openElements() const { return m_tree.openElements(); } 69 70 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); } 71 bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); } 72 bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); } 73 74 void detach(); 75 76 void constructTree(AtomicHTMLToken*); 77 78 bool hasParserBlockingScript() const { return !!m_scriptToProcess; } 79 // Must be called to take the parser-blocking script before calling the parser again. 80 PassRefPtrWillBeRawPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition); 81 82 // Done, close any open tags, etc. 83 void finished(); 84 85 // Synchronously empty any queues, possibly creating more DOM nodes. 86 void flush() { m_tree.flush(); } 87 88 void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; } 89 90private: 91 class CharacterTokenBuffer; 92 // Represents HTML5 "insertion mode" 93 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 94 enum InsertionMode { 95 InitialMode, 96 BeforeHTMLMode, 97 BeforeHeadMode, 98 InHeadMode, 99 InHeadNoscriptMode, 100 AfterHeadMode, 101 TemplateContentsMode, 102 InBodyMode, 103 TextMode, 104 InTableMode, 105 InTableTextMode, 106 InCaptionMode, 107 InColumnGroupMode, 108 InTableBodyMode, 109 InRowMode, 110 InCellMode, 111 InSelectMode, 112 InSelectInTableMode, 113 AfterBodyMode, 114 InFramesetMode, 115 AfterFramesetMode, 116 AfterAfterBodyMode, 117 AfterAfterFramesetMode, 118 }; 119 120 HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&); 121 HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&); 122 123 void processToken(AtomicHTMLToken*); 124 125 void processDoctypeToken(AtomicHTMLToken*); 126 void processStartTag(AtomicHTMLToken*); 127 void processEndTag(AtomicHTMLToken*); 128 void processComment(AtomicHTMLToken*); 129 void processCharacter(AtomicHTMLToken*); 130 void processEndOfFile(AtomicHTMLToken*); 131 132 bool processStartTagForInHead(AtomicHTMLToken*); 133 void processStartTagForInBody(AtomicHTMLToken*); 134 void processStartTagForInTable(AtomicHTMLToken*); 135 void processEndTagForInBody(AtomicHTMLToken*); 136 void processEndTagForInTable(AtomicHTMLToken*); 137 void processEndTagForInTableBody(AtomicHTMLToken*); 138 void processEndTagForInRow(AtomicHTMLToken*); 139 void processEndTagForInCell(AtomicHTMLToken*); 140 141 void processIsindexStartTagForInBody(AtomicHTMLToken*); 142 void processHtmlStartTagForInBody(AtomicHTMLToken*); 143 bool processBodyEndTagForInBody(AtomicHTMLToken*); 144 bool processTableEndTagForInTable(); 145 bool processCaptionEndTagForInCaption(); 146 bool processColgroupEndTagForInColumnGroup(); 147 bool processTrEndTagForInRow(); 148 // FIXME: This function should be inlined into its one call site or it 149 // needs to assert which tokens it can be called with. 150 void processAnyOtherEndTagForInBody(AtomicHTMLToken*); 151 152 void processCharacterBuffer(CharacterTokenBuffer&); 153 inline void processCharacterBufferForInBody(CharacterTokenBuffer&); 154 155 void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>()); 156 void processFakeEndTag(const QualifiedName&); 157 void processFakeEndTag(const AtomicString&); 158 void processFakePEndTagIfPInButtonScope(); 159 160 void processGenericRCDATAStartTag(AtomicHTMLToken*); 161 void processGenericRawTextStartTag(AtomicHTMLToken*); 162 void processScriptStartTag(AtomicHTMLToken*); 163 164 // Default processing for the different insertion modes. 165 void defaultForInitial(); 166 void defaultForBeforeHTML(); 167 void defaultForBeforeHead(); 168 void defaultForInHead(); 169 void defaultForInHeadNoscript(); 170 void defaultForAfterHead(); 171 void defaultForInTableText(); 172 173 inline HTMLStackItem* adjustedCurrentStackItem() const; 174 inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*); 175 void processTokenInForeignContent(AtomicHTMLToken*); 176 177 Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*); 178 179 void callTheAdoptionAgency(AtomicHTMLToken*); 180 181 void closeTheCell(); 182 183 template <bool shouldClose(const HTMLStackItem*)> 184 void processCloseWhenNestedTag(AtomicHTMLToken*); 185 186 void parseError(AtomicHTMLToken*); 187 188 InsertionMode insertionMode() const { return m_insertionMode; } 189 void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; } 190 191 void resetInsertionModeAppropriately(); 192 193 void processTemplateStartTag(AtomicHTMLToken*); 194 bool processTemplateEndTag(AtomicHTMLToken*); 195 bool processEndOfFileForInTemplateContents(AtomicHTMLToken*); 196 197 class FragmentParsingContext { 198 WTF_MAKE_NONCOPYABLE(FragmentParsingContext); 199 DISALLOW_ALLOCATION(); 200 public: 201 FragmentParsingContext(); 202 FragmentParsingContext(DocumentFragment*, Element* contextElement); 203 ~FragmentParsingContext(); 204 205 DocumentFragment* fragment() const { return m_fragment; } 206 Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); } 207 HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); } 208 209 void trace(Visitor*); 210 211 private: 212 RawPtrWillBeMember<DocumentFragment> m_fragment; 213 RefPtrWillBeMember<HTMLStackItem> m_contextElementStackItem; 214 }; 215 216 bool m_framesetOk; 217#ifndef NDEBUG 218 bool m_isAttached; 219#endif 220 FragmentParsingContext m_fragmentContext; 221 HTMLConstructionSite m_tree; 222 223 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 224 InsertionMode m_insertionMode; 225 226 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode 227 InsertionMode m_originalInsertionMode; 228 229 Vector<InsertionMode> m_templateInsertionModes; 230 231 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens 232 StringBuilder m_pendingTableCharacters; 233 234 bool m_shouldSkipLeadingNewline; 235 236 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer 237 // from within parser actions. We also need it to track the current position. 238 RawPtrWillBeMember<HTMLDocumentParser> m_parser; 239 240 RefPtrWillBeMember<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. 241 TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing. 242 243 HTMLParserOptions m_options; 244}; 245 246} 247 248#endif 249