15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/*
25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2011 Adam Barth. All Rights Reserved.
35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2011 Daniel Bates (dbates@intudata.com).
45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without
65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modification, are permitted provided that the following conditions
75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * are met:
85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    documentation and/or other materials provided with the distribution.
135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */
265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include "config.h"
2853e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/XSSAuditor.h"
2953e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)
305d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)#include "core/HTMLNames.h"
315d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)#include "core/SVGNames.h"
325d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)#include "core/XLinkNames.h"
3353e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/dom/Document.h"
34d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)#include "core/frame/LocalFrame.h"
35e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)#include "core/frame/Settings.h"
36d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)#include "core/frame/csp/ContentSecurityPolicy.h"
3753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/HTMLParamElement.h"
3853e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLDocumentParser.h"
3953e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLParserIdioms.h"
4009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#include "core/html/parser/TextResourceDecoder.h"
4153e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/XSSAuditorDelegate.h"
42e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)#include "core/inspector/ConsoleMessage.h"
4353e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/loader/DocumentLoader.h"
441e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)#include "platform/JSONValues.h"
45bfe3590b1806e3ff18f46ee3af5d4b83078f305aTorne (Richard Coles)#include "platform/network/FormData.h"
461e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)#include "platform/text/DecodeEscapeSequences.h"
4707a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch#include "wtf/ASCIICType.h"
4881a5157921f1d2a7ff6aae115bfe3c139b38a5c8Torne (Richard Coles)#include "wtf/MainThread.h"
495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)namespace {
5109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
5209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)// SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead.
5309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)const char kURLWithUniqueOrigin[] = "data:,";
5409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
5509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)} // namespace
5609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
57c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink {
585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)using namespace HTMLNames;
605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool isNonCanonicalCharacter(UChar c)
625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // We remove all non-ASCII characters, including non-printable ASCII characters.
645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    //
655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // adverse effect that we remove any legitimate zeros from a string.
68c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    //
69d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg,
70d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    // a//b becomes a/b.
715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    //
72c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    // We also remove the questionmark character, since some severs replace invalid high-bytes with a questionmark. We
73c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    // are already stripping the high-bytes so we also strip the questionmark to match.
74c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    //
75c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    // For instance: new String("http://localhost:8000?x") => new String("http:localhost:8x").
76c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)    return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' || c >= 127);
775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool isRequiredForInjection(UChar c)
805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return (c == '\'' || c == '"' || c == '<' || c == '>');
825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool isTerminatingCharacter(UChar c)
855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
86926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' || c == '>' || c == ',');
875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool isHTMLQuote(UChar c)
905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
915c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return (c == '"' || c == '\'');
925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool isJSNewline(UChar c)
955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Per ecma-262 section 7.3 Line Terminators.
975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029);
985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool startsHTMLCommentAt(const String& string, size_t start)
1015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
10207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch    return (start + 3 < string.length() && string[start] == '<' && string[start + 1] == '!' && string[start + 2] == '-' && string[start + 3] == '-');
1035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool startsSingleLineCommentAt(const String& string, size_t start)
1065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
10707a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch    return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '/');
1085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool startsMultiLineCommentAt(const String& string, size_t start)
1115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
11207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch    return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '*');
11307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch}
11407a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch
11507a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdochstatic bool startsOpeningScriptTagAt(const String& string, size_t start)
11607a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch{
11707a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch    return start + 6 < string.length() && string[start] == '<'
11807a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        && WTF::toASCIILowerUnchecked(string[start + 1]) == 's'
11907a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        && WTF::toASCIILowerUnchecked(string[start + 2]) == 'c'
12007a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        && WTF::toASCIILowerUnchecked(string[start + 3]) == 'r'
12107a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        && WTF::toASCIILowerUnchecked(string[start + 4]) == 'i'
12207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        && WTF::toASCIILowerUnchecked(string[start + 5]) == 'p'
12307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        && WTF::toASCIILowerUnchecked(string[start + 6]) == 't';
1245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
12653e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)// If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h
127926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)template<size_t inlineCapacity>
128926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, const QualifiedName& qname)
129926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
130926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return equalIgnoringNullity(vector, qname.localName().impl());
131926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
132926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
1335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool hasName(const HTMLToken& token, const QualifiedName& name)
1345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
135926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return threadSafeMatch(token.name(), name);
1365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
1395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
140926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    // Notice that we're careful not to ref the StringImpl here because we might be on a background thread.
141926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI ? "xlink:" + name.localName().string() : name.localName().string();
142926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
1435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    for (size_t i = 0; i < token.attributes().size(); ++i) {
144926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (equalIgnoringNullity(token.attributes().at(i).name, attrName)) {
1455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            indexOfMatchingAttribute = i;
1465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            return true;
1475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
1485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return false;
1505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
1535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
1555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (name.size() < lengthOfShortestInlineEventHandlerName)
1565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return false;
1575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return name[0] == 'o' && name[1] == 'n';
1585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static bool isDangerousHTTPEquiv(const String& value)
1615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    String equiv = value.stripWhiteSpace();
1635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie");
1645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static inline String decode16BitUnicodeEscapeSequences(const String& string)
1675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit.
1695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return decodeEscapeSequences<Unicode16BitEscapeSequence>(string, UTF8Encoding());
1705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
17281a5157921f1d2a7ff6aae115bfe3c139b38a5c8Torne (Richard Coles)static inline String decodeStandardURLEscapeSequences(const String& string, const WTF::TextEncoding& encoding)
1735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
174521d96ec04ace82590870fb04353ec4f82bb150fTorne (Richard Coles)    // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in weborigin/KURL.h) to
1755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // avoid platform-specific URL decoding differences (e.g. KURLGoogle).
1765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
1775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
17981a5157921f1d2a7ff6aae115bfe3c139b38a5c8Torne (Richard Coles)static String fullyDecodeString(const String& string, const WTF::TextEncoding& encoding)
1805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t oldWorkingStringLength;
1825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    String workingString = string;
1835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    do {
1845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        oldWorkingStringLength = workingString.length();
1855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding));
1865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    } while (workingString.length() < oldWorkingStringLength);
1875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    workingString.replace('+', ' ');
1885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return workingString;
1895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1915d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)static void truncateForSrcLikeAttribute(String& decodedSnippet)
1925d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles){
1935d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // In HTTP URLs, characters following the first ?, #, or third slash may come from
1945d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // the page itself and can be merely ignored by an attacker's server when a remote
1955d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // script or script-like resource is requested. In DATA URLS, the payload starts at
1965d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters
1975d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // following this may come from the page itself and may be ignored when the script is
1985d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // executed. For simplicity, we don't differentiate based on URL scheme, and stop at
1995d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // the first # or ?, the third slash, or the first slash or < once a comma is seen.
2005d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    int slashCount = 0;
2015d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    bool commaSeen = false;
2025d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) {
2035d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        UChar currentChar = decodedSnippet[currentLength];
2045d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        if (currentChar == '?'
2055d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            || currentChar == '#'
2065d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2))
2075d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            || (currentChar == '<' && commaSeen)) {
2085d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            decodedSnippet.truncate(currentLength);
2095d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            return;
2105d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        }
2115d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        if (currentChar == ',')
2125d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            commaSeen = true;
2135d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    }
2145d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)}
2155d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
2165d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)static void truncateForScriptLikeAttribute(String& decodedSnippet)
2175d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles){
2185d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // Beware of trailing characters which came from the page itself, not the
2195d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // injected vector. Excluding the terminating character covers common cases
2205d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // where the page immediately ends the attribute, but doesn't cover more
2215d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // complex cases where there is other page data following the injection.
2225d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // Generally, these won't parse as javascript, so the injected vector
2235d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // typically excludes them from consideration via a single-line comment or
2245d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // by enclosing them in a string literal terminated later by the page's own
2255d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // closing punctuation. Since the snippet has not been parsed, the vector
2265d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // may also try to introduce these via entities. As a result, we'd like to
2275d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // stop before the first "//", the first <!--, the first entity, or the first
2285d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // quote not immediately following the first equals sign (taking whitespace
2295d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // into consideration). To keep things simpler, we don't try to distinguish
2305d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // between entity-introducing amperands vs. other uses, nor do we bother to
2315d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // check for a second slash for a comment, nor do we bother to check for
2325d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // !-- following a less-than sign. We stop instead on any ampersand
2335d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // slash, or less-than sign.
2345d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    size_t position = 0;
2355d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if ((position = decodedSnippet.find("=")) != kNotFound
2365d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        && (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != kNotFound
2375d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != kNotFound) {
2385d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        decodedSnippet.truncate(position);
2395d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    }
2405d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)}
2415d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
2421e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)static ReflectedXSSDisposition combineXSSProtectionHeaderAndCSP(ReflectedXSSDisposition xssProtection, ReflectedXSSDisposition reflectedXSS)
243926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
2441e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    ReflectedXSSDisposition result = std::max(xssProtection, reflectedXSS);
245926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
2461e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    if (result == ReflectedXSSInvalid || result == FilterReflectedXSS || result == ReflectedXSSUnset)
2471e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        return FilterReflectedXSS;
248926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
249926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return result;
250926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
251926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
252926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)static bool isSemicolonSeparatedAttribute(const HTMLToken::Attribute& attribute)
253926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
254926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return threadSafeMatch(attribute.name, SVGNames::valuesAttr);
255926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
256926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
2575d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)static String semicolonSeparatedValueContainingJavaScriptURL(const String& value)
258926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
259926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    Vector<String> valueList;
260926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    value.split(';', valueList);
261926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (size_t i = 0; i < valueList.size(); ++i) {
2625d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        String stripped = stripLeadingAndTrailingHTMLSpaces(valueList[i]);
2635d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        if (protocolIsJavaScript(stripped))
2645d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            return stripped;
265926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
2665d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return emptyString();
267926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
268926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
269926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)XSSAuditor::XSSAuditor()
270926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    : m_isEnabled(false)
2711e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    , m_xssProtection(FilterReflectedXSS)
272926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_didSendValidCSPHeader(false)
273926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_didSendValidXSSProtectionHeader(false)
2745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_state(Uninitialized)
2750019e4eead4d990e4304c54a9028aca9122fb256Ben Murdoch    , m_scriptTagFoundInRequest(false)
2765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_scriptTagNestingLevel(0)
277926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    , m_encoding(UTF8Encoding())
2785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
2795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Although tempting to call init() at this point, the various objects
2805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // we want to reference might not all have been constructed yet.
2815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
2825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
28353e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)void XSSAuditor::initForFragment()
28453e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles){
28553e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)    ASSERT(isMainThread());
28653e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)    ASSERT(m_state == Uninitialized);
2871e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    m_state = FilteringTokens;
28853e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)    // When parsing a fragment, we don't enable the XSS auditor because it's
28953e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)    // too much overhead.
29053e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)    ASSERT(!m_isEnabled);
29153e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)}
29253e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)
293926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate)
2945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
295926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(isMainThread());
2961e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    if (m_state != Uninitialized)
297926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        return;
2981e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    m_state = FilteringTokens;
2995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
300e1f1df5f01594c0e62e751e4b46e779b85c2faa5Torne (Richard Coles)    if (Settings* settings = document->settings())
301e1f1df5f01594c0e62e751e4b46e779b85c2faa5Torne (Richard Coles)        m_isEnabled = settings->xssAuditorEnabled();
302926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
3035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!m_isEnabled)
3045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
3055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
306926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    m_documentURL = document->url().copy();
307926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
308d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)    // In theory, the Document could have detached from the LocalFrame after the
3095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // XSSAuditor was constructed.
310926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (!document->frame()) {
3115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_isEnabled = false;
3125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
3135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
3145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
315926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (m_documentURL.isEmpty()) {
3165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // The URL can be empty when opening a new browser window or calling window.open("").
3175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_isEnabled = false;
3185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
3195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
3205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
321926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (m_documentURL.protocolIsData()) {
3225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_isEnabled = false;
3235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
3245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
3255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
326c0e19a689c8ac22cdc96b291a8d33a5d3b0b34a4Torne (Richard Coles)    if (document->encoding().isValid())
327c0e19a689c8ac22cdc96b291a8d33a5d3b0b34a4Torne (Richard Coles)        m_encoding = document->encoding();
328926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
329f79f16f17ddc4f842d7b7a38603e280e94be826aTorne (Richard Coles)    if (DocumentLoader* documentLoader = document->frame()->loader().documentLoader()) {
330a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)        DEFINE_STATIC_LOCAL(const AtomicString, XSSProtectionHeader, ("X-XSS-Protection", AtomicString::ConstructFromLiteral));
331a854de003a23bf3c7f95ec0f8154ada64092ff5cTorne (Richard Coles)        const AtomicString& headerValue = documentLoader->response().httpHeaderField(XSSProtectionHeader);
3325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        String errorDetails;
3335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        unsigned errorPosition = 0;
3345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        String reportURL;
335926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        KURL xssProtectionReportURL;
336926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
337926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        // Process the X-XSS-Protection header, then mix in the CSP header's value.
3381e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        ReflectedXSSDisposition xssProtectionHeader = parseXSSProtectionHeader(headerValue, errorDetails, errorPosition, reportURL);
3391e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        m_didSendValidXSSProtectionHeader = xssProtectionHeader != ReflectedXSSUnset && xssProtectionHeader != ReflectedXSSInvalid;
3401e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        if ((xssProtectionHeader == FilterReflectedXSS || xssProtectionHeader == BlockReflectedXSS) && !reportURL.isEmpty()) {
341926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            xssProtectionReportURL = document->completeURL(reportURL);
342926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            if (MixedContentChecker::isMixedContent(document->securityOrigin(), xssProtectionReportURL)) {
3435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                errorDetails = "insecure reporting URL for secure page";
3441e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)                xssProtectionHeader = ReflectedXSSInvalid;
345926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                xssProtectionReportURL = KURL();
3465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            }
3475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
3481e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        if (xssProtectionHeader == ReflectedXSSInvalid)
349e38fbeeb576b5094e34e038ab88d9d6a5c5c2214Torne (Richard Coles)            document->addConsoleMessage(ConsoleMessage::create(SecurityMessageSource, ErrorMessageLevel, "Error parsing header X-XSS-Protection: " + headerValue + ": "  + errorDetails + " at character position " + String::format("%u", errorPosition) + ". The default protections will be applied."));
3505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
3511e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        ReflectedXSSDisposition cspHeader = document->contentSecurityPolicy()->reflectedXSSDisposition();
3521e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        m_didSendValidCSPHeader = cspHeader != ReflectedXSSUnset && cspHeader != ReflectedXSSInvalid;
3535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
354926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        m_xssProtection = combineXSSProtectionHeaderAndCSP(xssProtectionHeader, cspHeader);
355926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        // FIXME: Combine the two report URLs in some reasonable way.
356926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (auditorDelegate)
357926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            auditorDelegate->setReportURL(xssProtectionReportURL.copy());
35809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
35909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        FormData* httpBody = documentLoader->request().httpBody();
36009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        if (httpBody && !httpBody->isEmpty())
36109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            m_httpBodyAsString = httpBody->flattenToString();
3625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
3635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
36409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    setEncoding(m_encoding);
36509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)}
36609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
36709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)void XSSAuditor::setEncoding(const WTF::TextEncoding& encoding)
36809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles){
36909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
37009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    const int suffixTreeDepth = 5;
37109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
37209380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    if (!encoding.isValid())
3735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
37409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
37509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    m_encoding = encoding;
37609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
3775d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    m_decodedURL = canonicalize(m_documentURL.string(), NoTruncation);
37809380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    if (m_decodedURL.find(isRequiredForInjection) == kNotFound)
37909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        m_decodedURL = String();
38009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
38109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    if (!m_httpBodyAsString.isEmpty()) {
3825d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        m_decodedHTTPBody = canonicalize(m_httpBodyAsString, NoTruncation);
38309380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        m_httpBodyAsString = String();
38409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        if (m_decodedHTTPBody.find(isRequiredForInjection) == kNotFound)
38509380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            m_decodedHTTPBody = String();
38609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)            if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
38709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)                m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
3885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
38909380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)
39009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty())
39109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        m_isEnabled = false;
3925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
3935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
394926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)PassOwnPtr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& request)
3955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
3961e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    ASSERT(m_state != Uninitialized);
3971e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    if (!m_isEnabled || m_xssProtection == AllowReflectedXSS)
398926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        return nullptr;
3995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    bool didBlockScript = false;
401926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (request.token.type() == HTMLToken::StartTag)
402926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript = filterStartToken(request);
4035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    else if (m_scriptTagNestingLevel) {
404926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (request.token.type() == HTMLToken::Character)
405926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            didBlockScript = filterCharacterToken(request);
406926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        else if (request.token.type() == HTMLToken::EndTag)
407926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            filterEndToken(request);
4085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
4095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (didBlockScript) {
4111e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        bool didBlockEntirePage = (m_xssProtection == BlockReflectedXSS);
4121e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        OwnPtr<XSSInfo> xssInfo = XSSInfo::create(m_documentURL, didBlockEntirePage, m_didSendValidXSSProtectionHeader, m_didSendValidCSPHeader);
413926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        return xssInfo.release();
4145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
415926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return nullptr;
4165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
4175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
418926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterStartToken(const FilterTokenRequest& request)
4195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
4201e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    m_state = FilteringTokens;
421926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    bool didBlockScript = eraseDangerousAttributesIfInjected(request);
4225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
423926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (hasName(request.token, scriptTag)) {
424926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterScriptToken(request);
425926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel);
4265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_scriptTagNestingLevel++;
427926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    } else if (hasName(request.token, objectTag))
428926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterObjectToken(request);
429926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    else if (hasName(request.token, paramTag))
430926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterParamToken(request);
431926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    else if (hasName(request.token, embedTag))
432926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterEmbedToken(request);
433926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    else if (hasName(request.token, appletTag))
434926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterAppletToken(request);
4359bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    else if (hasName(request.token, iframeTag) || hasName(request.token, frameTag))
4369bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)        didBlockScript |= filterFrameToken(request);
437926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    else if (hasName(request.token, metaTag))
438926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterMetaToken(request);
439926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    else if (hasName(request.token, baseTag))
440926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterBaseToken(request);
441926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    else if (hasName(request.token, formTag))
442926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterFormToken(request);
443926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    else if (hasName(request.token, inputTag))
444926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterInputToken(request);
445926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    else if (hasName(request.token, buttonTag))
446926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= filterButtonToken(request);
4475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return didBlockScript;
4495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
4505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
451926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)void XSSAuditor::filterEndToken(const FilterTokenRequest& request)
4525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
4535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(m_scriptTagNestingLevel);
4541e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    m_state = FilteringTokens;
455926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (hasName(request.token, scriptTag)) {
4565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_scriptTagNestingLevel--;
457926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel);
4585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
4595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
4605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
461926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request)
4625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
4635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(m_scriptTagNestingLevel);
4641e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    ASSERT(m_state != Uninitialized);
4651e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    if (m_state == PermittingAdjacentCharacterTokens)
4661e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        return false;
4671e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)
4681e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    if ((m_state == SuppressingAdjacentCharacterTokens)
4695d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        || (m_scriptTagFoundInRequest && isContainedInRequest(canonicalizedSnippetForJavaScript(request)))) {
470926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        request.token.eraseCharacters();
471926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        request.token.appendToCharacter(' '); // Technically, character tokens can't be empty.
4721e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)        m_state = SuppressingAdjacentCharacterTokens;
4735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return true;
4745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
4751e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)
4761e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)    m_state = PermittingAdjacentCharacterTokens;
4775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return false;
4785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
4795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
480926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request)
4815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
482926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
483926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, scriptTag));
4845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
485926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    bool didBlockScript = false;
4865d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    m_scriptTagFoundInRequest = isContainedInRequest(canonicalizedSnippetForTagName(request));
4870019e4eead4d990e4304c54a9028aca9122fb256Ben Murdoch    if (m_scriptTagFoundInRequest) {
4885d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation);
4895d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttributeTruncation);
490926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
491926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return didBlockScript;
4925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
4935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
494926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request)
4955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
496926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
497926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, objectTag));
4985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
4995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    bool didBlockScript = false;
5005d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
5015d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttributeTruncation);
502926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, typeAttr);
503926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, classidAttr);
5045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
5055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return didBlockScript;
5065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
508926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterParamToken(const FilterTokenRequest& request)
5095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
510926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
511926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, paramTag));
5125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t indexOfNameAttribute;
514926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (!findAttributeWithName(request.token, nameAttr, indexOfNameAttribute))
5155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return false;
5165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
517926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    const HTMLToken::Attribute& nameAttribute = request.token.attributes().at(indexOfNameAttribute);
518926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (!HTMLParamElement::isURLParameter(String(nameAttribute.value)))
5195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return false;
5205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5215d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttributeTruncation);
5225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
524926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request)
5255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
526926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
527926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, embedTag));
5285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    bool didBlockScript = false;
5305d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
5315d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation);
5325d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation);
533926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, typeAttr);
5345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
5355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return didBlockScript;
5365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
538926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request)
5395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
540926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
541926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, appletTag));
5425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    bool didBlockScript = false;
5445d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
5455d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation);
546926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, objectAttr);
5475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
5485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return didBlockScript;
5495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5519bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)bool XSSAuditor::filterFrameToken(const FilterTokenRequest& request)
5525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
553926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
5549bbd2f5e390b01907d97ecffde80aa1b06113aacTorne (Richard Coles)    ASSERT(hasName(request.token, iframeTag) || hasName(request.token, frameTag));
5555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
5565d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttributeTruncation);
5575d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (isContainedInRequest(canonicalizedSnippetForTagName(request)))
5585d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttributeTruncation);
559f79f16f17ddc4f842d7b7a38603e280e94be826aTorne (Richard Coles)
5605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return didBlockScript;
5615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
563926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterMetaToken(const FilterTokenRequest& request)
5645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
565926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
566926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, metaTag));
5675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
568926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return eraseAttributeIfInjected(request, http_equivAttr);
5695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
571926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterBaseToken(const FilterTokenRequest& request)
5725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
573926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
574926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, baseTag));
5755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
576926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return eraseAttributeIfInjected(request, hrefAttr);
5775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
579926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterFormToken(const FilterTokenRequest& request)
5805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
581926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
582926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, formTag));
5835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
58409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)    return eraseAttributeIfInjected(request, actionAttr, kURLWithUniqueOrigin);
5855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
5865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
587926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterInputToken(const FilterTokenRequest& request)
588926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
589926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
590926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, inputTag));
591926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
5925d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation);
593926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
594926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
595926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request)
596926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
597926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(request.token.type() == HTMLToken::StartTag);
598926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    ASSERT(hasName(request.token, buttonTag));
599926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
6005d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation);
601926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
602926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
603926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request)
6045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
6053c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
6065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
6075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    bool didBlockScript = false;
608926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (size_t i = 0; i < request.token.attributes().size(); ++i) {
6095d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        bool eraseAttribute = false;
6105d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        bool valueContainsJavaScriptURL = false;
611926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        const HTMLToken::Attribute& attribute = request.token.attributes().at(i);
6125d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        // FIXME: Don't create a new String for every attribute.value in the document.
6135d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        if (isNameOfInlineEventHandler(attribute.name)) {
6145d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            eraseAttribute = isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation));
6155d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        } else if (isSemicolonSeparatedAttribute(attribute)) {
6165d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            String subValue = semicolonSeparatedValueContainingJavaScriptURL(String(attribute.value));
6175d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            if (!subValue.isEmpty()) {
6185d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)                valueContainsJavaScriptURL = true;
6195d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)                eraseAttribute = isContainedInRequest(canonicalize(nameFromAttribute(request, attribute), NoTruncation))
6205d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)                    && isContainedInRequest(canonicalize(subValue, ScriptLikeAttributeTruncation));
6215d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            }
62276c265b59aa821ccbf8c75ab2bb0d036e97d2956Torne (Richard Coles)        } else if (protocolIsJavaScript(stripLeadingAndTrailingHTMLSpaces(String(attribute.value)))) {
62376c265b59aa821ccbf8c75ab2bb0d036e97d2956Torne (Richard Coles)            valueContainsJavaScriptURL = true;
62476c265b59aa821ccbf8c75ab2bb0d036e97d2956Torne (Richard Coles)            eraseAttribute = isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation));
6255d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        }
6265d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        if (!eraseAttribute)
6275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            continue;
628926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        request.token.eraseValueOfAttribute(i);
6295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (valueContainsJavaScriptURL)
630926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            request.token.appendToAttributeValue(i, safeJavaScriptURL);
6315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        didBlockScript = true;
6325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
6335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return didBlockScript;
6345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
6355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
6365d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, TruncationKind treatment)
6375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
6385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t indexOfAttribute = 0;
6395d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (!findAttributeWithName(request.token, attributeName, indexOfAttribute))
6405d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        return false;
6415d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
6425d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute);
6435d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), treatment)))
6445d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        return false;
6455d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
6465d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (threadSafeMatch(attributeName, srcAttr)) {
6475d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        if (isLikelySafeResource(String(attribute.value)))
6485d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            return false;
6495d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    } else if (threadSafeMatch(attributeName, http_equivAttr)) {
6505d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        if (!isDangerousHTTPEquiv(String(attribute.value)))
6515d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            return false;
6525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
6535d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
6545d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    request.token.eraseValueOfAttribute(indexOfAttribute);
6555d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (!replacementValue.isEmpty())
6565d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        request.token.appendToAttributeValue(indexOfAttribute, replacementValue);
6575d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
6585d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return true;
6595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
6605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
6615d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)String XSSAuditor::canonicalizedSnippetForTagName(const FilterTokenRequest& request)
6625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
6635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<").
6645d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return canonicalize(request.sourceTracker.sourceForToken(request.token).substring(0, request.token.name().size() + 1), NoTruncation);
6655d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)}
6665d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
6675d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)String XSSAuditor::nameFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute)
6685d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles){
6695d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // The range inlcudes the character which terminates the name. So,
6705d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // for an input of |name="value"|, the snippet is |name=|.
6715d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    int start = attribute.nameRange.start - request.token.startIndex();
6725d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    int end = attribute.valueRange.start - request.token.startIndex();
6735d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return request.sourceTracker.sourceForToken(request.token).substring(start, end - start);
6745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
6755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
6765d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)String XSSAuditor::snippetFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute)
6775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
6785d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    // The range doesn't include the character which terminates the value. So,
6795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // for an input of |name="value"|, the snippet is |name="value|. For an
6805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // unquoted input of |name=value |, the snippet is |name=value|.
6815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // FIXME: We should grab one character before the name also.
682926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    int start = attribute.nameRange.start - request.token.startIndex();
683926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    int end = attribute.valueRange.end - request.token.startIndex();
6845d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return request.sourceTracker.sourceForToken(request.token).substring(start, end - start);
6855d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)}
6865d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
6875d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)String XSSAuditor::canonicalize(String snippet, TruncationKind treatment)
6885d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles){
6895d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    String decodedSnippet = fullyDecodeString(snippet, m_encoding);
6905d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
6915d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    if (treatment != NoTruncation) {
6925d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        decodedSnippet.truncate(kMaximumFragmentLengthTarget);
6935d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        if (treatment == SrcLikeAttributeTruncation)
6945d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            truncateForSrcLikeAttribute(decodedSnippet);
6955d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        else if (treatment == ScriptLikeAttributeTruncation)
6965d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)            truncateForScriptLikeAttribute(decodedSnippet);
6975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
6985d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
6995d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)    return decodedSnippet.removeCharacters(&isNonCanonicalCharacter);
7005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
7015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
7025d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)String XSSAuditor::canonicalizedSnippetForJavaScript(const FilterTokenRequest& request)
7035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
704926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    String string = request.sourceTracker.sourceForToken(request.token);
7055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t startPosition = 0;
7065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t endPosition = string.length();
70706f816c7c76bc45a15e452ade8a34e8af077693eTorne (Richard Coles)    size_t foundPosition = kNotFound;
70807a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch    size_t lastNonSpacePosition = kNotFound;
7095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
7105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Skip over initial comments to find start of code.
7115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    while (startPosition < endPosition) {
71206f816c7c76bc45a15e452ade8a34e8af077693eTorne (Richard Coles)        while (startPosition < endPosition && isHTMLSpace<UChar>(string[startPosition]))
7135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            startPosition++;
7145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
7155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Under SVG/XML rules, only HTML comment syntax matters and the parser returns
7165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // these as a separate comment tokens. Having consumed whitespace, we need not look
7175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // further for these.
718926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        if (request.shouldAllowCDATA)
7195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            break;
7205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
7215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML
7225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // comment ends at the end of the line, not with -->.
7235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (startsHTMLCommentAt(string, startPosition) || startsSingleLineCommentAt(string, startPosition)) {
7245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            while (startPosition < endPosition && !isJSNewline(string[startPosition]))
7255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                startPosition++;
7265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        } else if (startsMultiLineCommentAt(string, startPosition)) {
72706f816c7c76bc45a15e452ade8a34e8af077693eTorne (Richard Coles)            if (startPosition + 2 < endPosition && (foundPosition = string.find("*/", startPosition + 2)) != kNotFound)
7285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                startPosition = foundPosition + 2;
7295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            else
7305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                startPosition = endPosition;
7315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        } else
7325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            break;
7335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
7345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
735926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    String result;
736926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    while (startPosition < endPosition && !result.length()) {
73707a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma,
73807a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        // when we hit an opening <script> tag, or when we exceed the maximum length target. The comma rule
73907a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        // covers a common parameter concatenation case performed by some web servers.
74007a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch        lastNonSpacePosition = kNotFound;
741926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) {
742926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            if (!request.shouldAllowCDATA) {
74307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                if (startsSingleLineCommentAt(string, foundPosition)
74407a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                    || startsMultiLineCommentAt(string, foundPosition)
74507a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                    || startsHTMLCommentAt(string, foundPosition)) {
746926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                    break;
747926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)                }
7485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            }
74907a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            if (string[foundPosition] == ',')
75007a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                break;
75107a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch
75207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            if (lastNonSpacePosition != kNotFound && startsOpeningScriptTagAt(string, foundPosition)) {
75307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                foundPosition = lastNonSpacePosition;
7545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                break;
7555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            }
75607a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {
75707a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                // After hitting the length target, we can only stop at a point where we know we are
75807a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                // not in the middle of a %-escape sequence. For the sake of simplicity, approximate
75907a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on
76007a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                // whitespace only. We should have enough text in these cases to avoid false positives.
76107a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                if (isHTMLSpace<UChar>(string[foundPosition]))
76207a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                    break;
76307a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            }
76407a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch            if (!isHTMLSpace<UChar>(string[foundPosition]))
76507a852d8c1953036774d8f3b65d18dcfea3bb4a2Ben Murdoch                lastNonSpacePosition = foundPosition;
7665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
7675d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        result = canonicalize(string.substring(startPosition, foundPosition - startPosition), NoTruncation);
768926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        startPosition = foundPosition + 1;
769926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
7705d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)
771926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return result;
7725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
7735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
7745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)bool XSSAuditor::isContainedInRequest(const String& decodedSnippet)
7755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
7765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (decodedSnippet.isEmpty())
7775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return false;
77806f816c7c76bc45a15e452ade8a34e8af077693eTorne (Richard Coles)    if (m_decodedURL.find(decodedSnippet, 0, false) != kNotFound)
7795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return true;
7805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(decodedSnippet))
7815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return false;
78206f816c7c76bc45a15e452ade8a34e8af077693eTorne (Richard Coles)    return m_decodedHTTPBody.find(decodedSnippet, 0, false) != kNotFound;
7835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
7845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
7855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)bool XSSAuditor::isLikelySafeResource(const String& url)
7865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
7875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Give empty URLs and about:blank a pass. Making a resourceURL from an
7885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // empty string below will likely later fail the "no query args test" as
7895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // it inherits the document's query args.
7905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (url.isEmpty() || url == blankURL().string())
7915c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return true;
7925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
7935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // If the resource is loaded from the same host as the enclosing page, it's
7945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // probably not an XSS attack, so we reduce false positives by allowing the
7955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // request, ignoring scheme and port considerations. If the resource has a
7965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // query string, we're more suspicious, however, because that's pretty rare
7975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // and the attacker might be able to trick a server-side script into doing
79802772c6a72f1ee0b226341a4f4439970c29fc861Ben Murdoch    // something dangerous with the query string.
799926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    if (m_documentURL.host().isEmpty())
8005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return false;
8015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
802926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    KURL resourceURL(m_documentURL, url);
803926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return (m_documentURL.host() == resourceURL.host() && resourceURL.query().isEmpty());
804926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
805926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
806926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)bool XSSAuditor::isSafeToSendToAnotherThread() const
807926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
808926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    return m_documentURL.isSafeToSendToAnotherThread()
809926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        && m_decodedURL.isSafeToSendToAnotherThread()
81009380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        && m_decodedHTTPBody.isSafeToSendToAnotherThread()
81109380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)        && m_httpBodyAsString.isSafeToSendToAnotherThread();
8125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
8135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
814c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)} // namespace blink
815