1926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)/*
2926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
4926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without
5926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * modification, are permitted provided that the following conditions
6926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * are met:
7926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
8926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
9926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
10926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
11926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    documentation and/or other materials provided with the distribution.
12926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
13926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) */
25926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
26926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#include "config.h"
2753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/BackgroundHTMLParser.h"
28926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
2953e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLDocumentParser.h"
3009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#include "core/html/parser/TextResourceDecoder.h"
3153e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/XSSAuditor.h"
32591b958dee2cf159d33a0b931e6231072eaf38d5Ben Murdoch#include "wtf/MainThread.h"
33591b958dee2cf159d33a0b931e6231072eaf38d5Ben Murdoch#include "wtf/text/TextPosition.h"
34926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
35c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink {
36926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
37926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// On a network with high latency and high bandwidth, using a device
38926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// with a fast CPU, we could end up speculatively tokenizing
39926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// the whole document, well ahead of when the main-thread actually needs it.
40926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// This is a waste of memory (and potentially time if the speculation fails).
41a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)// So we limit our outstanding tokens arbitrarily to 10,000.
42926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// Our maximal memory spent speculating will be approximately:
43a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)// (outstandingTokenLimit + pendingTokenLimit) * sizeof(CompactToken)
44926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// We use a separate low and high water mark to avoid constantly topping
45926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// off the main thread's token buffer.
46a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)// At time of writing, this is (10000 + 1000) * 28 bytes = ~308kb of memory.
47926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// These numbers have not been tuned.
48a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)static const size_t outstandingTokenLimit = 10000;
49926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
50926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// We limit our chucks to 1000 tokens, to make sure the main
51926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// thread is never waiting on the parser thread for tokens.
52926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)// This was tuned in https://bugs.webkit.org/show_bug.cgi?id=110408.
53926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)static const size_t pendingTokenLimit = 1000;
54926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
55926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)using namespace HTMLNames;
56926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
57197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch#if ENABLE(ASSERT)
58926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
59926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)static void checkThatTokensAreSafeToSendToAnotherThread(const CompactHTMLTokenStream* tokens)
60926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
61926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (size_t i = 0; i < tokens->size(); ++i)
62926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        ASSERT(tokens->at(i).isSafeToSendToAnotherThread());
63926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
64926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
65926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)static void checkThatPreloadsAreSafeToSendToAnotherThread(const PreloadRequestStream& preloads)
66926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
67926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (size_t i = 0; i < preloads.size(); ++i)
68926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        ASSERT(preloads[i]->isSafeToSendToAnotherThread());
69926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
70926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
711e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)static void checkThatXSSInfosAreSafeToSendToAnotherThread(const XSSInfoStream& infos)
721e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles){
731e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    for (size_t i = 0; i < infos.size(); ++i)
741e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        ASSERT(infos[i]->isSafeToSendToAnotherThread());
751e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)}
761e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)
77926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#endif
78926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
7909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)void BackgroundHTMLParser::start(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config)
8009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles){
8109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    new BackgroundHTMLParser(reference, config);
8209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    // Caller must free by calling stop().
8309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)}
8409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
85926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)BackgroundHTMLParser::BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config)
86926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    : m_weakFactory(reference, this)
87926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_token(adoptPtr(new HTMLToken))
88926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_tokenizer(HTMLTokenizer::create(config->options))
89926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_treeBuilderSimulator(config->options)
90926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_options(config->options)
91926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_parser(config->parser)
92926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream))
93926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_xssAuditor(config->xssAuditor.release())
94926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_preloadScanner(config->preloadScanner.release())
9509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    , m_decoder(config->decoder.release())
9609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles){
9709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)}
9809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
9909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)BackgroundHTMLParser::~BackgroundHTMLParser()
100926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
101926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
102926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
103aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdochvoid BackgroundHTMLParser::appendRawBytesFromParserThread(const char* data, int dataLength)
104926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
105aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    ASSERT(m_decoder);
106aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    updateDocument(m_decoder->decode(data, dataLength));
107926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
108926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
109aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdochvoid BackgroundHTMLParser::appendRawBytesFromMainThread(PassOwnPtr<Vector<char> > buffer)
11009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles){
111aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    ASSERT(m_decoder);
11209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    updateDocument(m_decoder->decode(buffer->data(), buffer->size()));
11309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)}
11409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
115aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdochvoid BackgroundHTMLParser::appendDecodedBytes(const String& input)
116aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch{
117aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    ASSERT(!m_input.current().isClosed());
118aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    m_input.append(input);
119aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    pumpTokenizer();
120aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch}
121aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch
12209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)void BackgroundHTMLParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder)
12309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles){
124aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    ASSERT(decoder);
12509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    m_decoder = decoder;
12609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)}
12709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
12809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)void BackgroundHTMLParser::flush()
12909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles){
130aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    ASSERT(m_decoder);
13109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    updateDocument(m_decoder->flush());
13209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)}
13309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
13409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)void BackgroundHTMLParser::updateDocument(const String& decodedData)
13509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles){
13609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    DocumentEncodingData encodingData(*m_decoder.get());
13709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
13809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    if (encodingData != m_lastSeenEncodingData) {
13909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        m_lastSeenEncodingData = encodingData;
14009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
14109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        m_xssAuditor->setEncoding(encodingData.encoding());
14209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        callOnMainThread(bind(&HTMLDocumentParser::didReceiveEncodingDataFromBackgroundParser, m_parser, encodingData));
14309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    }
14409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
14509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    if (decodedData.isEmpty())
14609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        return;
14709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
148aafa69cb17c9d6606c07663ade5f81388a2c5986Ben Murdoch    appendDecodedBytes(decodedData);
14909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)}
15009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
151926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void BackgroundHTMLParser::resumeFrom(PassOwnPtr<Checkpoint> checkpoint)
152926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
153926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_parser = checkpoint->parser;
154926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_token = checkpoint->token.release();
155926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_tokenizer = checkpoint->tokenizer.release();
156926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_treeBuilderSimulator.setState(checkpoint->treeBuilderState);
157926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_input.rewindTo(checkpoint->inputCheckpoint, checkpoint->unparsedInput);
158926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_preloadScanner->rewindTo(checkpoint->preloadScannerCheckpoint);
159926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pumpTokenizer();
160926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
161926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
162926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void BackgroundHTMLParser::startedChunkWithCheckpoint(HTMLInputCheckpoint inputCheckpoint)
163926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
164926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // Note, we should not have to worry about the index being invalid
165926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // as messages from the main thread will be processed in FIFO order.
166926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_input.invalidateCheckpointsBefore(inputCheckpoint);
167926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pumpTokenizer();
168926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
169926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
170926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void BackgroundHTMLParser::finish()
171926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
172926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    markEndOfFile();
173926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pumpTokenizer();
174926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
175926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
176926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void BackgroundHTMLParser::stop()
177926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
178926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    delete this;
179926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
180926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
181926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void BackgroundHTMLParser::forcePlaintextForTextDocument()
182926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
183926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // This is only used by the TextDocumentParser (a subclass of HTMLDocumentParser)
184926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // to force us into the PLAINTEXT state w/o using a <plaintext> tag.
185926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // The TextDocumentParser uses a <pre> tag for historical/compatibility reasons.
186926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
187926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
188926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
189926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void BackgroundHTMLParser::markEndOfFile()
190926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
191926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(!m_input.current().isClosed());
192926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_input.append(String(&kEndOfFileMarker, 1));
193926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_input.close();
194926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
195926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
196926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void BackgroundHTMLParser::pumpTokenizer()
197926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
198926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // No need to start speculating until the main thread has almost caught up.
199a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    if (m_input.totalCheckpointTokenCount() > outstandingTokenLimit)
200926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        return;
201926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
202926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    while (true) {
203926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        m_sourceTracker.start(m_input.current(), m_tokenizer.get(), *m_token);
204e52495584422c5edb5b2944981473a2e208da323Torne (Richard Coles)        if (!m_tokenizer->nextToken(m_input.current(), *m_token)) {
205926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            // We've reached the end of our current input.
206926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            sendTokensToMainThread();
207926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            break;
208926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        }
209926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        m_sourceTracker.end(m_input.current(), m_tokenizer.get(), *m_token);
210926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
211926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        {
212926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            TextPosition position = TextPosition(m_input.current().currentLine(), m_input.current().currentColumn());
213926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
214926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor->filterToken(FilterTokenRequest(*m_token, m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) {
215926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                xssInfo->m_textPosition = position;
216926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                m_pendingXSSInfos.append(xssInfo.release());
217926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            }
218926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
219926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            CompactHTMLToken token(m_token.get(), TextPosition(m_input.current().currentLine(), m_input.current().currentColumn()));
220926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
2215267f701546148b83dfbe1d151cb184385bb5c22Torne (Richard Coles)            m_preloadScanner->scan(token, m_input.current(), m_pendingPreloads);
222926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
223926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            m_pendingTokens->append(token);
224926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        }
225926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
226926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        m_token->clear();
227926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
228926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (!m_treeBuilderSimulator.simulate(m_pendingTokens->last(), m_tokenizer.get()) || m_pendingTokens->size() >= pendingTokenLimit) {
229926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            sendTokensToMainThread();
230926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            // If we're far ahead of the main thread, yield for a bit to avoid consuming too much memory.
231a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)            if (m_input.totalCheckpointTokenCount() > outstandingTokenLimit)
232926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                break;
233926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        }
234926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
235926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
236926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
237926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void BackgroundHTMLParser::sendTokensToMainThread()
238926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
239926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (m_pendingTokens->isEmpty())
240926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        return;
241926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
242197021e6b966cfb06891637935ef33fff06433d1Ben Murdoch#if ENABLE(ASSERT)
243926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    checkThatTokensAreSafeToSendToAnotherThread(m_pendingTokens.get());
244926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    checkThatPreloadsAreSafeToSendToAnotherThread(m_pendingPreloads);
2451e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    checkThatXSSInfosAreSafeToSendToAnotherThread(m_pendingXSSInfos);
246926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#endif
247926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
248926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    OwnPtr<HTMLDocumentParser::ParsedChunk> chunk = adoptPtr(new HTMLDocumentParser::ParsedChunk);
249926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    chunk->preloads.swap(m_pendingPreloads);
250926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    chunk->xssInfos.swap(m_pendingXSSInfos);
251926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    chunk->tokenizerState = m_tokenizer->state();
252926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    chunk->treeBuilderState = m_treeBuilderSimulator.state();
253a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    chunk->inputCheckpoint = m_input.createCheckpoint(m_pendingTokens->size());
254926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint();
255a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)    chunk->tokens = m_pendingTokens.release();
256926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser, m_parser, chunk.release()));
257926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
258926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_pendingTokens = adoptPtr(new CompactHTMLTokenStream);
259926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
260926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
261926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
262