1926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)/*
2926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
4926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without
5926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * modification, are permitted provided that the following conditions
6926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * are met:
7926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
8926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
9926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
10926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
11926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    documentation and/or other materials provided with the distribution.
12926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
13926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) */
25926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
26926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#ifndef BackgroundHTMLParser_h
27926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#define BackgroundHTMLParser_h
28926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
295d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)#include "core/dom/DocumentEncodingData.h"
3053e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/BackgroundHTMLInputStream.h"
3153e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/CompactHTMLToken.h"
3253e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLParserOptions.h"
3353e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLPreloadScanner.h"
3453e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLSourceTracker.h"
3553e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLTreeBuilderSimulator.h"
365d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)#include "core/html/parser/TextResourceDecoder.h"
3753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/XSSAuditorDelegate.h"
38591b958dee2cf159d33a0b931e6231072eaf38d5Ben Murdoch#include "wtf/PassOwnPtr.h"
39591b958dee2cf159d33a0b931e6231072eaf38d5Ben Murdoch#include "wtf/WeakPtr.h"
40926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
41c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink {
42926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
43926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)class HTMLDocumentParser;
4409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)class SharedBuffer;
45926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)class XSSAuditor;
46926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
47926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)class BackgroundHTMLParser {
48926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    WTF_MAKE_FAST_ALLOCATED;
49926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)public:
50926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    struct Configuration {
51926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        HTMLParserOptions options;
52926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        WeakPtr<HTMLDocumentParser> parser;
53926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        OwnPtr<XSSAuditor> xssAuditor;
54926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        OwnPtr<TokenPreloadScanner> preloadScanner;
5509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        OwnPtr<TextResourceDecoder> decoder;
56926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    };
57926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
5809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    static void start(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>);
59926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
60926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    struct Checkpoint {
61926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        WeakPtr<HTMLDocumentParser> parser;
62926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        OwnPtr<HTMLToken> token;
63926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        OwnPtr<HTMLTokenizer> tokenizer;
64926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        HTMLTreeBuilderSimulator::State treeBuilderState;
65926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        HTMLInputCheckpoint inputCheckpoint;
66926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
67926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        String unparsedInput;
68926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    };
69926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
70aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    void appendRawBytesFromParserThread(const char* data, int dataLength);
71aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch
72aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    void appendRawBytesFromMainThread(PassOwnPtr<Vector<char> >);
7309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    void setDecoder(PassOwnPtr<TextResourceDecoder>);
7409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    void flush();
75926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    void resumeFrom(PassOwnPtr<Checkpoint>);
76926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    void startedChunkWithCheckpoint(HTMLInputCheckpoint);
77926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    void finish();
78926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    void stop();
79926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
80926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    void forcePlaintextForTextDocument();
81926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
82926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)private:
83926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>);
8409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    ~BackgroundHTMLParser();
85926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
86aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    void appendDecodedBytes(const String&);
87926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    void markEndOfFile();
88926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    void pumpTokenizer();
89926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    void sendTokensToMainThread();
9009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    void updateDocument(const String& decodedData);
91926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
92926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    WeakPtrFactory<BackgroundHTMLParser> m_weakFactory;
93926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    BackgroundHTMLInputStream m_input;
94926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    HTMLSourceTracker m_sourceTracker;
95926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    OwnPtr<HTMLToken> m_token;
96926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    OwnPtr<HTMLTokenizer> m_tokenizer;
97926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    HTMLTreeBuilderSimulator m_treeBuilderSimulator;
98926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    HTMLParserOptions m_options;
99926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    WeakPtr<HTMLDocumentParser> m_parser;
100926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
101926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    OwnPtr<CompactHTMLTokenStream> m_pendingTokens;
102926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    PreloadRequestStream m_pendingPreloads;
103926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    XSSInfoStream m_pendingXSSInfos;
104926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
105926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    OwnPtr<XSSAuditor> m_xssAuditor;
106926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    OwnPtr<TokenPreloadScanner> m_preloadScanner;
10709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    OwnPtr<TextResourceDecoder> m_decoder;
10809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    DocumentEncodingData m_lastSeenEncodingData;
109926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)};
110926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
111926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
112926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
113926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#endif
114