138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// Copyright (c) 2011, Mike Samuel
238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// All rights reserved.
338bb37b955601261fd8945ee22aa09ac30d29298mikesamuel//
438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// Redistribution and use in source and binary forms, with or without
538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// modification, are permitted provided that the following conditions
638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// are met:
738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel//
838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// Redistributions of source code must retain the above copyright
938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// notice, this list of conditions and the following disclaimer.
1038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// Redistributions in binary form must reproduce the above copyright
1138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// notice, this list of conditions and the following disclaimer in the
1238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// documentation and/or other materials provided with the distribution.
1338bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// Neither the name of the OWASP nor the names of its contributors may
1438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// be used to endorse or promote products derived from this software
1538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// without specific prior written permission.
1638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
1938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
2038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
2238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2338bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
2638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel// POSSIBILITY OF SUCH DAMAGE.
2838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
2938bb37b955601261fd8945ee22aa09ac30d29298mikesamuelpackage org.owasp.html;
3038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
3138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel/**
3238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * Pre-packaged HTML sanitizer policies.
3338bb37b955601261fd8945ee22aa09ac30d29298mikesamuel *
3438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * <p>
3538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * These policies can be used to sanitize content.
3638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * </p>
3738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * <pre>
386434d0d4455c4afb38b7c9c58c4ad844fb761a3fmikesamuel *   Sanitizers.FORMATTING.sanitize({@code "<b>Hello, World!</b>"})
3938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * </pre>
4038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * and can be chained
4138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * <pre>
4238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel *   PolicyFactory sanitizer = Sanitizers.FORMATTING.and(Sanitizers.BLOCKS);
436434d0d4455c4afb38b7c9c58c4ad844fb761a3fmikesamuel *   System.out.println(sanitizer.sanitize({@code "<p>Hello, <b>World!</b>"}));
4438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * </pre>
4538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel *
4638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * <p>
47d7c2f9f6c741b83b880ad878269d18ceb1af1d4amikesamuel * For more fine-grained control over sanitization, use
4838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * {@link HtmlPolicyBuilder}.
4938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * </p>
5038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel *
5138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel * @author Mike Samuel <mikesamuel@gmail.com>
5238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel */
5338bb37b955601261fd8945ee22aa09ac30d29298mikesamuelpublic final class Sanitizers {
5438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
5538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  /**
5638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel   * Allows common formatting elements including {@code <b>}, {@code <i>}, etc.
5738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel   */
5838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  public static final PolicyFactory FORMATTING = new HtmlPolicyBuilder()
5938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .allowCommonInlineFormattingElements().toFactory();
6038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
6138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  /**
626434d0d4455c4afb38b7c9c58c4ad844fb761a3fmikesamuel   * Allows common block elements including <code>&lt;p&gt;</code>,
636434d0d4455c4afb38b7c9c58c4ad844fb761a3fmikesamuel   * <code>&lt;h1&gt;</code>, etc.
6438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel   */
6538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  public static final PolicyFactory BLOCKS = new HtmlPolicyBuilder()
6638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .allowCommonBlockElements().toFactory();
6738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
6838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  /**
696434d0d4455c4afb38b7c9c58c4ad844fb761a3fmikesamuel   * Allows certain safe CSS properties in {@code style="..."} attributes.
7038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel   */
7138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  public static final PolicyFactory STYLES = new HtmlPolicyBuilder()
7238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .allowStyling().toFactory();
7338bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
7438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  /**
7538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel   * Allows HTTP, HTTPS, MAILTO, and relative links.
7638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel   */
7738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  public static final PolicyFactory LINKS = new HtmlPolicyBuilder()
7838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .allowStandardUrlProtocols().allowElements("a")
7938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .allowAttributes("href").onElements("a").requireRelNofollowOnLinks()
8038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .toFactory();
8138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
8238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  private static final AttributePolicy INTEGER = new AttributePolicy() {
83ce5bde40e2e126de05105f09f1f965a5c70aaa94mikesamuel    public String apply(
84ce5bde40e2e126de05105f09f1f965a5c70aaa94mikesamuel        String elementName, String attributeName, String value) {
8538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      int n = value.length();
8638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      if (n == 0) { return null; }
8738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      for (int i = 0; i < n; ++i) {
8838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel        char ch = value.charAt(i);
8938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel        if (ch == '.') {
9038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel          if (i == 0) { return null; }
9138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel          return value.substring(0, i);  // truncate to integer.
9238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel        } else if (!('0' <= ch && ch <= '9')) {
9338bb37b955601261fd8945ee22aa09ac30d29298mikesamuel          return null;
9438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel        }
9538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      }
9638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      return value;
9738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel    }
9838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  };
9938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
10038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  /**
1016434d0d4455c4afb38b7c9c58c4ad844fb761a3fmikesamuel   * Allows {@code <img>} elements from HTTP, HTTPS, and relative sources.
10238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel   */
10338bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  public static final PolicyFactory IMAGES = new HtmlPolicyBuilder()
10438bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .allowUrlProtocols("http", "https").allowElements("img")
10538bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .allowAttributes("alt", "src").onElements("img")
10638bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .allowAttributes("border", "height", "width").matching(INTEGER)
10738bb37b955601261fd8945ee22aa09ac30d29298mikesamuel          .onElements("img")
10838bb37b955601261fd8945ee22aa09ac30d29298mikesamuel      .toFactory();
10938bb37b955601261fd8945ee22aa09ac30d29298mikesamuel
11038bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  private Sanitizers() {
11138bb37b955601261fd8945ee22aa09ac30d29298mikesamuel    // Uninstantiable.
11238bb37b955601261fd8945ee22aa09ac30d29298mikesamuel  }
11338bb37b955601261fd8945ee22aa09ac30d29298mikesamuel}
114