// Copyright (c) 2011, Mike Samuel // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // // Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // Neither the name of the OWASP nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. package org.owasp.html; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import com.google.common.collect.Sets; /** * Conveniences for configuring policies for the {@link HtmlSanitizer}. * *
* To create a policy, first construct an instance of this class; then call
* allow…
methods to turn on tags, attributes, and other
* processing modes; and finally call build()
or
* toFactory()
.
*
* // Define the policy. * Function* *policyDefinition * = new HtmlPolicyBuilder() * .allowElements("a", "p") * .allowAttributesOnElement("a", "href") * .toFactory(); * * // Sanitize your output. * HtmlSanitizer.sanitize(myHtml. policyDefinition.apply(myHtmlStreamRenderer)); *
* Embedded URLs are filtered by * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}. * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy} * so you can easily white-list widely used policies that don't violate the * current pages origin. See "Customization" below for ways to do further * filtering. If you allow links it might be worthwhile to * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require} * {@code rel=nofollow}. *
** This class simply throws out all embedded JS. * Use a custom element or attribute policy to allow through * signed or otherwise known-safe code. * Check out the Caja project if you need a way to contain third-party JS. *
** This class does not attempt to faithfully parse and sanitize CSS. * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option * that allows through a few CSS properties that allow textual styling, but that * disallow image loading, history stealing, layout breaking, code execution, * etc. *
* *
* You can easily do custom processing on tags and attributes by supplying your
* own {@link ElementPolicy element policy} or
* {@link AttributePolicy attribute policy} when calling
* allow…
.
* E.g. to convert headers into {@code
* new HtmlPolicyBuilder * .allowElement( * new ElementPolicy() { * public String apply(String elementName, List* *attributes) { * attributes.add("class"); * attributes.add("header-" + elementName); * return "div"; * } * }, * "h1", "h2", "h3", "h4", "h5", "h6") * .build(outputChannel) *
* Throughout this class, several rules hold: *
disallow…
methods, but those reverse
* allows instead of rolling back overly permissive defaults.
* * This class is not thread-safe. The resulting policy will not violate its * security guarantees as a result of race conditions, but is not thread safe * because it maintains state to track whether text inside disallowed elements * should be suppressed. *
* The resulting policy can be reused, but if you use the * {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then * binding policies to output channels is cheap so there's no need. *
* * @author Mike Samueltype
which have
* different meanings on different attributes.
*/
public HtmlPolicyBuilder allowAttributesGlobally(String... attributeNames) {
return allowAttributesGlobally(
AttributePolicy.IDENTITY_ATTRIBUTE_POLICY, attributeNames);
}
/**
* Disallows the given attributes on any elements.
* Attributes are disallowed unless explicitly allowed, so there is no need
* to call this except to reverse an earlier
* {@link #allowAttributesGlobally allow}.
* Disallowing an attribute globally also disallows it on specific elements.
*/
public HtmlPolicyBuilder disallowAttributesGlobally(
String... attributeNames) {
return allowAttributesGlobally(
AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY, attributeNames);
}
/**
* Allows the given attributes on any elements as long as the value matches
* the pattern.
*
* @param p A pattern that the attribute value must match.
*/
public HtmlPolicyBuilder allowAttributesGlobally(
final Pattern p, String... attributeNames) {
return allowAttributesGlobally(
new AttributePolicy() {
public @Nullable String apply(
String elementName, String attributeName, String value) {
return p.matcher(value).matches() ? value : null;
}
}, attributeNames);
}
/**
* Allows the given attributes on any elements as long as the value matches
* the predicate.
*
* @param p A predicate that the attribute value must match.
*/
public HtmlPolicyBuilder allowAttributesGlobally(
final Predicate super String> p, String... attributeNames) {
return allowAttributesGlobally(
new AttributePolicy() {
public @Nullable String apply(
String elementName, String attributeName, String value) {
return p.apply(value) ? value : null;
}
}, attributeNames);
}
/**
* Allows the given attributes on any elements.
* Global attribute policies are applied after element specific policies.
* Be careful of using this with attributes like type
which have
* different meanings on different attributes.
* Also be careful of allowing globally attributes like href
* which can have more far-reaching effects on tags like
* <base>
and <link>
than on
* <a>
because in the former, they have an effect without
* user interaction and can change the behavior of the current page.
*
* @param policy Can allow, specify a different value for, or deny the
* attribute.
*/
public HtmlPolicyBuilder allowAttributesGlobally(
AttributePolicy policy, String... attributeNames) {
invalidateCompiledState();
for (String attributeName : attributeNames) {
attributeName = HtmlLexer.canonicalName(attributeName);
// We reinterpret the identity policy later via policy joining since its
// the default passed from the policy-less method, but we don't do
// anything here since we don't know until build() is called whether the
// policy author wants to allow certain URL protocols or wants to deal
// with styles.
AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName);
globalAttrPolicies.put(
attributeName, AttributePolicy.Util.join(oldPolicy, policy));
}
return this;
}
/**
* Allows the named attributes on the given element.
*/
public HtmlPolicyBuilder allowAttributesOnElement(
String elementName, String... attributeNames) {
return allowAttributesOnElement(
AttributePolicy.IDENTITY_ATTRIBUTE_POLICY, elementName, attributeNames);
}
/**
* Allows the given attributes on any elements as long as the value matches
* the pattern.
*
* @param p A pattern that the attribute value must match.
*/
public HtmlPolicyBuilder allowAttributesOnElement(
final Pattern p, String elementName, String... attributeNames) {
return allowAttributesOnElement(
new AttributePolicy() {
public @Nullable String apply(
String elementName, String attributeName, String value) {
return p.matcher(value).matches() ? value : null;
}
}, elementName, attributeNames);
}
/**
* Allows the given attributes on the given element as long as the value
* matches the predicate.
*
* @param p A predicate that the attribute value must match.
*/
public HtmlPolicyBuilder allowAttributesOnElement(
final Predicate super String> p, String elementName,
String... attributeNames) {
return allowAttributesOnElement(
new AttributePolicy() {
public @Nullable String apply(
String elementName, String attributeName, String value) {
return p.apply(value) ? value : null;
}
}, elementName, attributeNames);
}
/**
* Allows the named attributes on the given element.
*
* @param policy Can allow, specify a different value for, or deny the
* attribute.
*/
public HtmlPolicyBuilder allowAttributesOnElement(
AttributePolicy policy, String elementName, String... attributeNames) {
invalidateCompiledState();
elementName = HtmlLexer.canonicalName(elementName);
Map
* Attributes are disallowed by default, so there is no need to call this
* with a laundry list of attribute/element pairs.
*/
public HtmlPolicyBuilder disallowAttributesOnElement(
String elementName, String... attributeNames) {
return allowAttributesOnElement(
AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY,
elementName, attributeNames);
}
/**
* Adds rel=nofollow
* to links.
*/
public HtmlPolicyBuilder requireRelNofollowOnLinks() {
invalidateCompiledState();
this.requireRelNofollowOnLinks = true;
return this;
}
/**
* Adds to the set of protocols that are allowed in URL attributes.
* For each URL attribute that is allowed, we further constrain it by
* only allowing the value through if it specifies no protocol, or if it
* specifies one in the allowedProtocols white-list.
* This is done regardless of whether any protocols have been allowed, so
* allowing the attribute "href" globally with the identity policy but
* not white-listing any protocols, effectively disallows the "href"
* attribute globally.
*
* Do not allow any *script
such as javascript
* protocols if you might use this policy with untrusted code.
*/
public HtmlPolicyBuilder allowUrlProtocols(String... protocols) {
invalidateCompiledState();
// If there is at least one allowed protocol, then allow URLs and
// add a filter that checks href and src values.
// Do not allow href and srcs through otherwise, and only allow on images
// and links.
for (String protocol : protocols) {
protocol = Strings.toLowerCase(protocol);
allowedProtocols.add(protocol);
}
return this;
}
/**
* Reverses a decision made by {@link #allowUrlProtocols}.
*/
public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) {
invalidateCompiledState();
for (String protocol : protocols) {
protocol = Strings.toLowerCase(protocol);
allowedProtocols.remove(protocol);
}
return this;
}
/**
* A canned URL protocol policy that allows http
,
* https
, and mailto
.
*/
public HtmlPolicyBuilder allowStandardUrlProtocols() {
return allowUrlProtocols("http", "https", "mailto");
}
/**
* Convert style="<CSS>"
to simple non-JS containing
* <font>
tags to allow color, font-size, typeface, and
* other styling.
*/
public HtmlPolicyBuilder allowStyling() {
invalidateCompiledState();
allowStyling = true;
return this;
}
/**
* Names of attributes from HTML 4 whose values are URLs.
* Other attributes, e.g. style
may contain URLs even though
* there values are not URLs.
*/
private static final Set