1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef HTMLDocumentParser_h
27#define HTMLDocumentParser_h
28
29#include "core/dom/ParserContentPolicy.h"
30#include "core/dom/ScriptableDocumentParser.h"
31#include "core/fetch/ResourceClient.h"
32#include "core/frame/UseCounter.h"
33#include "core/html/parser/BackgroundHTMLInputStream.h"
34#include "core/html/parser/CompactHTMLToken.h"
35#include "core/html/parser/HTMLInputStream.h"
36#include "core/html/parser/HTMLParserOptions.h"
37#include "core/html/parser/HTMLPreloadScanner.h"
38#include "core/html/parser/HTMLScriptRunnerHost.h"
39#include "core/html/parser/HTMLSourceTracker.h"
40#include "core/html/parser/HTMLToken.h"
41#include "core/html/parser/HTMLTokenizer.h"
42#include "core/html/parser/HTMLTreeBuilderSimulator.h"
43#include "core/html/parser/TextResourceDecoder.h"
44#include "core/html/parser/XSSAuditor.h"
45#include "core/html/parser/XSSAuditorDelegate.h"
46#include "platform/text/SegmentedString.h"
47#include "wtf/Deque.h"
48#include "wtf/OwnPtr.h"
49#include "wtf/WeakPtr.h"
50#include "wtf/text/TextPosition.h"
51
52namespace blink {
53
54class BackgroundHTMLParser;
55class CompactHTMLToken;
56class Document;
57class DocumentFragment;
58class HTMLDocument;
59class HTMLParserScheduler;
60class HTMLScriptRunner;
61class HTMLTreeBuilder;
62class HTMLResourcePreloader;
63class ScriptController;
64class ScriptSourceCode;
65
66class PumpSession;
67
68class HTMLDocumentParser :  public ScriptableDocumentParser, private HTMLScriptRunnerHost {
69    WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED;
70    WILL_BE_USING_GARBAGE_COLLECTED_MIXIN(HTMLDocumentParser);
71public:
72    static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(HTMLDocument& document, bool reportErrors)
73    {
74        return adoptRefWillBeNoop(new HTMLDocumentParser(document, reportErrors));
75    }
76    virtual ~HTMLDocumentParser();
77    virtual void trace(Visitor*) OVERRIDE;
78
79    // Exposed for HTMLParserScheduler
80    void resumeParsingAfterYield();
81
82    static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent);
83
84    HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
85
86    virtual TextPosition textPosition() const OVERRIDE FINAL;
87    virtual OrdinalNumber lineNumber() const OVERRIDE FINAL;
88
89    virtual void suspendScheduledTasks() OVERRIDE FINAL;
90    virtual void resumeScheduledTasks() OVERRIDE FINAL;
91
92    struct ParsedChunk {
93        OwnPtr<CompactHTMLTokenStream> tokens;
94        PreloadRequestStream preloads;
95        XSSInfoStream xssInfos;
96        HTMLTokenizer::State tokenizerState;
97        HTMLTreeBuilderSimulator::State treeBuilderState;
98        HTMLInputCheckpoint inputCheckpoint;
99        TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
100    };
101    void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
102    void didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData&);
103
104    virtual void appendBytes(const char* bytes, size_t length) OVERRIDE;
105    virtual void flush() OVERRIDE FINAL;
106    virtual void setDecoder(PassOwnPtr<TextResourceDecoder>) OVERRIDE FINAL;
107
108    UseCounter* useCounter() { return UseCounter::getFrom(contextForParsingSession()); }
109
110protected:
111    virtual void insert(const SegmentedString&) OVERRIDE FINAL;
112    virtual void append(PassRefPtr<StringImpl>) OVERRIDE;
113    virtual void finish() OVERRIDE FINAL;
114
115    HTMLDocumentParser(HTMLDocument&, bool reportErrors);
116    HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy);
117
118    HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
119
120    void forcePlaintextForTextDocument();
121
122private:
123    static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
124    {
125        return adoptRefWillBeNoop(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy));
126    }
127
128    // DocumentParser
129    virtual void pinToMainThread() OVERRIDE FINAL;
130    virtual void detach() OVERRIDE FINAL;
131    virtual bool hasInsertionPoint() OVERRIDE FINAL;
132    virtual bool processingData() const OVERRIDE FINAL;
133    virtual void prepareToStopParsing() OVERRIDE FINAL;
134    virtual void stopParsing() OVERRIDE FINAL;
135    virtual bool isWaitingForScripts() const OVERRIDE FINAL;
136    virtual bool isExecutingScript() const OVERRIDE FINAL;
137    virtual void executeScriptsWaitingForResources() OVERRIDE FINAL;
138
139    // HTMLScriptRunnerHost
140    virtual void notifyScriptLoaded(Resource*) OVERRIDE FINAL;
141    virtual HTMLInputStream& inputStream() OVERRIDE FINAL { return m_input; }
142    virtual bool hasPreloadScanner() const OVERRIDE FINAL { return m_preloadScanner.get() && !shouldUseThreading(); }
143    virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE FINAL;
144
145    void startBackgroundParser();
146    void stopBackgroundParser();
147    void validateSpeculations(PassOwnPtr<ParsedChunk> lastChunk);
148    void discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunk, PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
149    void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
150    void pumpPendingSpeculations();
151
152    Document* contextForParsingSession();
153
154    enum SynchronousMode {
155        AllowYield,
156        ForceSynchronous,
157    };
158    bool canTakeNextToken(SynchronousMode, PumpSession&);
159    void pumpTokenizer(SynchronousMode);
160    void pumpTokenizerIfPossible(SynchronousMode);
161    void constructTreeFromHTMLToken(HTMLToken&);
162    void constructTreeFromCompactHTMLToken(const CompactHTMLToken&);
163
164    void runScriptsForPausedTreeBuilder();
165    void resumeParsingAfterScriptExecution();
166
167    void attemptToEnd();
168    void endIfDelayed();
169    void attemptToRunDeferredScriptsAndEnd();
170    void end();
171
172    bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; }
173
174    bool isParsingFragment() const;
175    bool isScheduledForResume() const;
176    bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; }
177    bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); }
178
179    HTMLToken& token() { return *m_token; }
180
181    HTMLParserOptions m_options;
182    HTMLInputStream m_input;
183
184    OwnPtr<HTMLToken> m_token;
185    OwnPtr<HTMLTokenizer> m_tokenizer;
186    OwnPtrWillBeMember<HTMLScriptRunner> m_scriptRunner;
187    OwnPtrWillBeMember<HTMLTreeBuilder> m_treeBuilder;
188    OwnPtr<HTMLPreloadScanner> m_preloadScanner;
189    OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner;
190    OwnPtr<HTMLParserScheduler> m_parserScheduler;
191    HTMLSourceTracker m_sourceTracker;
192    TextPosition m_textPosition;
193    XSSAuditor m_xssAuditor;
194    XSSAuditorDelegate m_xssAuditorDelegate;
195
196    // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object
197    // so they can be set and cleared together and passed between threads together.
198    OwnPtr<ParsedChunk> m_lastChunkBeforeScript;
199    Deque<OwnPtr<ParsedChunk> > m_speculations;
200    WeakPtrFactory<HTMLDocumentParser> m_weakFactory;
201    WeakPtr<BackgroundHTMLParser> m_backgroundParser;
202    OwnPtrWillBeMember<HTMLResourcePreloader> m_preloader;
203
204    bool m_isPinnedToMainThread;
205    bool m_endWasDelayed;
206    bool m_haveBackgroundParser;
207    unsigned m_pumpSessionNestingLevel;
208};
209
210}
211
212#endif
213