1/*
2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef BackgroundHTMLParser_h
27#define BackgroundHTMLParser_h
28
29#include "core/dom/DocumentEncodingData.h"
30#include "core/html/parser/BackgroundHTMLInputStream.h"
31#include "core/html/parser/CompactHTMLToken.h"
32#include "core/html/parser/HTMLParserOptions.h"
33#include "core/html/parser/HTMLPreloadScanner.h"
34#include "core/html/parser/HTMLSourceTracker.h"
35#include "core/html/parser/HTMLTreeBuilderSimulator.h"
36#include "core/html/parser/TextResourceDecoder.h"
37#include "core/html/parser/XSSAuditorDelegate.h"
38#include "wtf/PassOwnPtr.h"
39#include "wtf/WeakPtr.h"
40
41namespace blink {
42
43class HTMLDocumentParser;
44class SharedBuffer;
45class XSSAuditor;
46
47class BackgroundHTMLParser {
48    WTF_MAKE_FAST_ALLOCATED;
49public:
50    struct Configuration {
51        HTMLParserOptions options;
52        WeakPtr<HTMLDocumentParser> parser;
53        OwnPtr<XSSAuditor> xssAuditor;
54        OwnPtr<TokenPreloadScanner> preloadScanner;
55        OwnPtr<TextResourceDecoder> decoder;
56    };
57
58    static void start(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>);
59
60    struct Checkpoint {
61        WeakPtr<HTMLDocumentParser> parser;
62        OwnPtr<HTMLToken> token;
63        OwnPtr<HTMLTokenizer> tokenizer;
64        HTMLTreeBuilderSimulator::State treeBuilderState;
65        HTMLInputCheckpoint inputCheckpoint;
66        TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
67        String unparsedInput;
68    };
69
70    void appendRawBytesFromParserThread(const char* data, int dataLength);
71
72    void appendRawBytesFromMainThread(PassOwnPtr<Vector<char> >);
73    void setDecoder(PassOwnPtr<TextResourceDecoder>);
74    void flush();
75    void resumeFrom(PassOwnPtr<Checkpoint>);
76    void startedChunkWithCheckpoint(HTMLInputCheckpoint);
77    void finish();
78    void stop();
79
80    void forcePlaintextForTextDocument();
81
82private:
83    BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>);
84    ~BackgroundHTMLParser();
85
86    void appendDecodedBytes(const String&);
87    void markEndOfFile();
88    void pumpTokenizer();
89    void sendTokensToMainThread();
90    void updateDocument(const String& decodedData);
91
92    WeakPtrFactory<BackgroundHTMLParser> m_weakFactory;
93    BackgroundHTMLInputStream m_input;
94    HTMLSourceTracker m_sourceTracker;
95    OwnPtr<HTMLToken> m_token;
96    OwnPtr<HTMLTokenizer> m_tokenizer;
97    HTMLTreeBuilderSimulator m_treeBuilderSimulator;
98    HTMLParserOptions m_options;
99    WeakPtr<HTMLDocumentParser> m_parser;
100
101    OwnPtr<CompactHTMLTokenStream> m_pendingTokens;
102    PreloadRequestStream m_pendingPreloads;
103    XSSInfoStream m_pendingXSSInfos;
104
105    OwnPtr<XSSAuditor> m_xssAuditor;
106    OwnPtr<TokenPreloadScanner> m_preloadScanner;
107    OwnPtr<TextResourceDecoder> m_decoder;
108    DocumentEncodingData m_lastSeenEncodingData;
109};
110
111}
112
113#endif
114