15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/* 25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2010 Google, Inc. All Rights Reserved. 35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without 55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modification, are permitted provided that the following conditions 65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * are met: 75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 1. Redistributions of source code must retain the above copyright 85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * notice, this list of conditions and the following disclaimer. 95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright 105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * notice, this list of conditions and the following disclaimer in the 115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * documentation and/or other materials provided with the distribution. 125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */ 255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include "config.h" 2753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLViewSourceParser.h" 285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 2993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "core/dom/DOMImplementation.h" 3009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#include "core/html/parser/HTMLParserIdioms.h" 3153e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLParserOptions.h" 3293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "core/html/parser/HTMLToken.h" 33f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles)#include "core/html/parser/XSSAuditorDelegate.h" 345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 35c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink { 365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 37d6cdb82654e8f3343a693ca752d5c4cee0324e17Torne (Richard Coles)HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument& document, const String& mimeType) 385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) : DecodedDataDocumentParser(document) 39d6cdb82654e8f3343a693ca752d5c4cee0324e17Torne (Richard Coles) , m_tokenizer(HTMLTokenizer::create(HTMLParserOptions(&document))) 405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 4193ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) if (mimeType != "text/html" && !DOMImplementation::isXMLMIMEType(mimeType)) 4293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void HTMLViewSourceParser::pumpTokenizer() 465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 47f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles) m_xssAuditor.init(document(), 0); 48f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles) 495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) while (true) { 50926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_sourceTracker.start(m_input.current(), m_tokenizer.get(), m_token); 515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (!m_tokenizer->nextToken(m_input.current(), m_token)) 5293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) return; 53926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_sourceTracker.end(m_input.current(), m_tokenizer.get(), m_token); 545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 55f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles) OwnPtr<XSSInfo> xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(m_token, m_sourceTracker, m_tokenizer->shouldAllowCDATA())); 56f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles) HTMLViewSourceDocument::SourceAnnotation annotation = xssInfo ? HTMLViewSourceDocument::AnnotateSourceAsXSS : HTMLViewSourceDocument::AnnotateSourceAsSafe; 57f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles) document()->addSource(m_sourceTracker.sourceForToken(m_token), m_token, annotation); 5893ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) 5993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) // FIXME: The tokenizer should do this work for us. 6093ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) if (m_token.type() == HTMLToken::StartTag) 6109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles) m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit)); 625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) m_token.clear(); 635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 66926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void HTMLViewSourceParser::append(PassRefPtr<StringImpl> input) 675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 68926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) m_input.appendToEnd(String(input)); 695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) pumpTokenizer(); 705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void HTMLViewSourceParser::finish() 735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (!m_input.haveSeenEndOfFile()) 755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) m_input.markEndOfFile(); 765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) pumpTokenizer(); 775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) document()->finishedParsing(); 785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 81