SlashdotPolicyExample.java revision c4058d94a0e30de4532c65c0ec4a1ffd6d6ba26e
1// Copyright (c) 2011, Mike Samuel
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// Neither the name of the OWASP nor the names of its contributors may
14// be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28
29package org.owasp.html.examples;
30
31import java.io.IOException;
32import java.io.InputStreamReader;
33import java.util.regex.Pattern;
34
35import org.owasp.html.Handler;
36import org.owasp.html.HtmlPolicyBuilder;
37import org.owasp.html.HtmlSanitizer;
38import org.owasp.html.HtmlStreamEventReceiver;
39import org.owasp.html.HtmlStreamRenderer;
40
41import com.google.common.base.Charsets;
42import com.google.common.base.Function;
43import com.google.common.base.Throwables;
44import com.google.common.io.CharStreams;
45
46/**
47 * Based on the
48 * <a href="http://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project#Stage_2_-_Choosing_a_base_policy_file">AntiSamy Slashdot example</a>.
49 * <blockquote>
50 * Slashdot (http://www.slashdot.org/) is a techie news site that allows users
51 * to respond anonymously to news posts with very limited HTML markup. Now
52 * Slashdot is not only one of the coolest sites around, it's also one that's
53 * been subject to many different successful attacks. Even more unfortunate is
54 * the fact that most of the attacks led users to the infamous goatse.cx picture
55 * (please don't go look it up). The rules for Slashdot are fairly strict: users
56 * can only submit the following HTML tags and no CSS: {@code <b>}, {@code <u>},
57 * {@code <i>}, {@code <a>}, {@code <blockquote>}.
58 * <br>
59 * Accordingly, we've built a policy file that allows fairly similar
60 * functionality. All text-formatting tags that operate directly on the font,
61 * color or emphasis have been allowed.
62 * </blockquote>
63 */
64public class SlashdotPolicyExample {
65
66  /** A policy definition that matches the minimal HTML that Slashdot allows. */
67  public static final Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy>
68      POLICY_DEFINITION = new HtmlPolicyBuilder()
69          .allowStandardUrlProtocols()
70          // Allow title="..." on any element.
71          .allowAttributes("title").globally()
72          // Allow href="..." on <a> elements.
73          .allowAttributes("href").onElements("a")
74          // Defeat link spammers.
75          .requireRelNofollowOnLinks()
76          // Allow lang= with an alphabetic value on any element.
77          .allowAttributes("lang").matching(Pattern.compile("[a-zA-Z]{2,20}"))
78              .globally()
79          // The align attribute on <p> elements can have any value below.
80          .allowAttributes("align")
81              .matching(true, "center", "left", "right", "justify", "char")
82              .onElements("p")
83          // These elements are allowed.
84          .allowElements(
85              "a", "p", "div", "i", "b", "em", "blockquote", "tt", "strong",
86              "br", "ul", "ol", "li")
87          // Custom slashdot tags.
88          // These could be rewritten in the sanitizer using an ElementPolicy.
89          .allowElements("quote", "ecode")
90          .toFactory();
91
92  public static void main(String[] args) throws IOException {
93    if (args.length != 0) {
94      System.err.println("Reads from STDIN and writes to STDOUT");
95      System.exit(-1);
96    }
97    System.err.println("[Reading from STDIN]");
98    // Fetch the HTML to sanitize.
99    String html = CharStreams.toString(
100        new InputStreamReader(System.in, Charsets.UTF_8));
101    // Set up an output channel to receive the sanitized HTML.
102    HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
103        System.out,
104        // Receives notifications on a failure to write to the output.
105        new Handler<IOException>() {
106          public void handle(IOException ex) {
107            Throwables.propagate(ex);  // System.out suppresses IOExceptions
108          }
109        },
110        // Our HTML parser is very lenient, but this receives notifications on
111        // truly bizarre inputs.
112        new Handler<String>() {
113          public void handle(String x) {
114            throw new AssertionError(x);
115          }
116        });
117    // Use the policy defined above to sanitize the HTML.
118    HtmlSanitizer.sanitize(html, POLICY_DEFINITION.apply(renderer));
119  }
120}
121