1/* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "config.h" 27#include "core/html/parser/BackgroundHTMLParser.h" 28 29#include "core/html/parser/HTMLDocumentParser.h" 30#include "core/html/parser/HTMLParserThread.h" 31#include "core/html/parser/HTMLTokenizer.h" 32#include "core/html/parser/XSSAuditor.h" 33#include "wtf/MainThread.h" 34#include "wtf/text/TextPosition.h" 35 36namespace WebCore { 37 38// On a network with high latency and high bandwidth, using a device 39// with a fast CPU, we could end up speculatively tokenizing 40// the whole document, well ahead of when the main-thread actually needs it. 41// This is a waste of memory (and potentially time if the speculation fails). 42// So we limit our outstanding speculations arbitrarily to 10. 43// Our maximal memory spent speculating will be approximately: 44// outstandingCheckpointLimit * pendingTokenLimit * sizeof(CompactToken) 45// We use a separate low and high water mark to avoid constantly topping 46// off the main thread's token buffer. 47// At time of writing, this is 10 * 1000 * 28 bytes = appox 280kb of memory. 48// These numbers have not been tuned. 49static const size_t outstandingCheckpointLimit = 10; 50 51// We limit our chucks to 1000 tokens, to make sure the main 52// thread is never waiting on the parser thread for tokens. 53// This was tuned in https://bugs.webkit.org/show_bug.cgi?id=110408. 54static const size_t pendingTokenLimit = 1000; 55 56using namespace HTMLNames; 57 58#ifndef NDEBUG 59 60static void checkThatTokensAreSafeToSendToAnotherThread(const CompactHTMLTokenStream* tokens) 61{ 62 for (size_t i = 0; i < tokens->size(); ++i) 63 ASSERT(tokens->at(i).isSafeToSendToAnotherThread()); 64} 65 66static void checkThatPreloadsAreSafeToSendToAnotherThread(const PreloadRequestStream& preloads) 67{ 68 for (size_t i = 0; i < preloads.size(); ++i) 69 ASSERT(preloads[i]->isSafeToSendToAnotherThread()); 70} 71 72#endif 73 74BackgroundHTMLParser::BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config) 75 : m_weakFactory(reference, this) 76 , m_token(adoptPtr(new HTMLToken)) 77 , m_tokenizer(HTMLTokenizer::create(config->options)) 78 , m_treeBuilderSimulator(config->options) 79 , m_options(config->options) 80 , m_parser(config->parser) 81 , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream)) 82 , m_xssAuditor(config->xssAuditor.release()) 83 , m_preloadScanner(config->preloadScanner.release()) 84{ 85} 86 87void BackgroundHTMLParser::append(const String& input) 88{ 89 ASSERT(!m_input.current().isClosed()); 90 m_input.append(input); 91 pumpTokenizer(); 92} 93 94void BackgroundHTMLParser::resumeFrom(PassOwnPtr<Checkpoint> checkpoint) 95{ 96 m_parser = checkpoint->parser; 97 m_token = checkpoint->token.release(); 98 m_tokenizer = checkpoint->tokenizer.release(); 99 m_treeBuilderSimulator.setState(checkpoint->treeBuilderState); 100 m_input.rewindTo(checkpoint->inputCheckpoint, checkpoint->unparsedInput); 101 m_preloadScanner->rewindTo(checkpoint->preloadScannerCheckpoint); 102 pumpTokenizer(); 103} 104 105void BackgroundHTMLParser::startedChunkWithCheckpoint(HTMLInputCheckpoint inputCheckpoint) 106{ 107 // Note, we should not have to worry about the index being invalid 108 // as messages from the main thread will be processed in FIFO order. 109 m_input.invalidateCheckpointsBefore(inputCheckpoint); 110 pumpTokenizer(); 111} 112 113void BackgroundHTMLParser::finish() 114{ 115 markEndOfFile(); 116 pumpTokenizer(); 117} 118 119void BackgroundHTMLParser::stop() 120{ 121 delete this; 122} 123 124void BackgroundHTMLParser::forcePlaintextForTextDocument() 125{ 126 // This is only used by the TextDocumentParser (a subclass of HTMLDocumentParser) 127 // to force us into the PLAINTEXT state w/o using a <plaintext> tag. 128 // The TextDocumentParser uses a <pre> tag for historical/compatibility reasons. 129 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 130} 131 132void BackgroundHTMLParser::markEndOfFile() 133{ 134 ASSERT(!m_input.current().isClosed()); 135 m_input.append(String(&kEndOfFileMarker, 1)); 136 m_input.close(); 137} 138 139void BackgroundHTMLParser::pumpTokenizer() 140{ 141 // No need to start speculating until the main thread has almost caught up. 142 if (m_input.outstandingCheckpointCount() > outstandingCheckpointLimit) 143 return; 144 145 while (true) { 146 m_sourceTracker.start(m_input.current(), m_tokenizer.get(), *m_token); 147 if (!m_tokenizer->nextToken(m_input.current(), *m_token)) { 148 // We've reached the end of our current input. 149 sendTokensToMainThread(); 150 break; 151 } 152 m_sourceTracker.end(m_input.current(), m_tokenizer.get(), *m_token); 153 154 { 155 TextPosition position = TextPosition(m_input.current().currentLine(), m_input.current().currentColumn()); 156 157 if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor->filterToken(FilterTokenRequest(*m_token, m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) { 158 xssInfo->m_textPosition = position; 159 m_pendingXSSInfos.append(xssInfo.release()); 160 } 161 162 CompactHTMLToken token(m_token.get(), TextPosition(m_input.current().currentLine(), m_input.current().currentColumn())); 163 164 m_preloadScanner->scan(token, m_input.current(), m_pendingPreloads); 165 166 m_pendingTokens->append(token); 167 } 168 169 m_token->clear(); 170 171 if (!m_treeBuilderSimulator.simulate(m_pendingTokens->last(), m_tokenizer.get()) || m_pendingTokens->size() >= pendingTokenLimit) { 172 sendTokensToMainThread(); 173 // If we're far ahead of the main thread, yield for a bit to avoid consuming too much memory. 174 if (m_input.outstandingCheckpointCount() > outstandingCheckpointLimit) 175 break; 176 } 177 } 178} 179 180void BackgroundHTMLParser::sendTokensToMainThread() 181{ 182 if (m_pendingTokens->isEmpty()) 183 return; 184 185#ifndef NDEBUG 186 checkThatTokensAreSafeToSendToAnotherThread(m_pendingTokens.get()); 187 checkThatPreloadsAreSafeToSendToAnotherThread(m_pendingPreloads); 188#endif 189 190 OwnPtr<HTMLDocumentParser::ParsedChunk> chunk = adoptPtr(new HTMLDocumentParser::ParsedChunk); 191 chunk->tokens = m_pendingTokens.release(); 192 chunk->preloads.swap(m_pendingPreloads); 193 chunk->xssInfos.swap(m_pendingXSSInfos); 194 chunk->tokenizerState = m_tokenizer->state(); 195 chunk->treeBuilderState = m_treeBuilderSimulator.state(); 196 chunk->inputCheckpoint = m_input.createCheckpoint(); 197 chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint(); 198 callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser, m_parser, chunk.release())); 199 200 m_pendingTokens = adoptPtr(new CompactHTMLTokenStream); 201} 202 203} 204