15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/*
25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2010 Google, Inc. All Rights Reserved.
35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without
55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modification, are permitted provided that the following conditions
65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * are met:
75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    documentation and/or other materials provided with the distribution.
125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */
255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include "config.h"
2753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLViewSourceParser.h"
285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "core/dom/DOMImplementation.h"
3009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#include "core/html/parser/HTMLParserIdioms.h"
3153e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLParserOptions.h"
3293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "core/html/parser/HTMLToken.h"
33f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles)#include "core/html/parser/XSSAuditorDelegate.h"
345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
35c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink {
365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
37d6cdb82654e8f3343a693ca752d5c4cee0324e17Torne (Richard Coles)HTMLViewSourceParser::HTMLViewSourceParser(HTMLViewSourceDocument& document, const String& mimeType)
385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    : DecodedDataDocumentParser(document)
39d6cdb82654e8f3343a693ca752d5c4cee0324e17Torne (Richard Coles)    , m_tokenizer(HTMLTokenizer::create(HTMLParserOptions(&document)))
405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
4193ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)    if (mimeType != "text/html" && !DOMImplementation::isXMLMIMEType(mimeType))
4293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)        m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void HTMLViewSourceParser::pumpTokenizer()
465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
47f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles)    m_xssAuditor.init(document(), 0);
48f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles)
495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    while (true) {
50926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        m_sourceTracker.start(m_input.current(), m_tokenizer.get(), m_token);
515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (!m_tokenizer->nextToken(m_input.current(), m_token))
5293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)            return;
53926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        m_sourceTracker.end(m_input.current(), m_tokenizer.get(), m_token);
545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
55f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles)        OwnPtr<XSSInfo> xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(m_token, m_sourceTracker, m_tokenizer->shouldAllowCDATA()));
56f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles)        HTMLViewSourceDocument::SourceAnnotation annotation = xssInfo ? HTMLViewSourceDocument::AnnotateSourceAsXSS : HTMLViewSourceDocument::AnnotateSourceAsSafe;
57f6b7aed3f7ce69aca0d7a032d144cbd088b04393Torne (Richard Coles)        document()->addSource(m_sourceTracker.sourceForToken(m_token), m_token, annotation);
5893ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)
5993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)        // FIXME: The tokenizer should do this work for us.
6093ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)        if (m_token.type() == HTMLToken::StartTag)
6109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit));
625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_token.clear();
635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
66926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void HTMLViewSourceParser::append(PassRefPtr<StringImpl> input)
675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
68926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_input.appendToEnd(String(input));
695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    pumpTokenizer();
705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void HTMLViewSourceParser::finish()
735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!m_input.haveSeenEndOfFile())
755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_input.markEndOfFile();
765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    pumpTokenizer();
775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    document()->finishedParsing();
785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
81