SlashdotPolicyExample.html revision 846d5d0377617bd20ac271a486f07bfe757cc7a2
1<HTML> 2<BODY BGCOLOR="white"> 3<PRE> 4<FONT color="green">001</FONT> // Copyright (c) 2011, Mike Samuel<a name="line.1"></a> 5<FONT color="green">002</FONT> // All rights reserved.<a name="line.2"></a> 6<FONT color="green">003</FONT> //<a name="line.3"></a> 7<FONT color="green">004</FONT> // Redistribution and use in source and binary forms, with or without<a name="line.4"></a> 8<FONT color="green">005</FONT> // modification, are permitted provided that the following conditions<a name="line.5"></a> 9<FONT color="green">006</FONT> // are met:<a name="line.6"></a> 10<FONT color="green">007</FONT> //<a name="line.7"></a> 11<FONT color="green">008</FONT> // Redistributions of source code must retain the above copyright<a name="line.8"></a> 12<FONT color="green">009</FONT> // notice, this list of conditions and the following disclaimer.<a name="line.9"></a> 13<FONT color="green">010</FONT> // Redistributions in binary form must reproduce the above copyright<a name="line.10"></a> 14<FONT color="green">011</FONT> // notice, this list of conditions and the following disclaimer in the<a name="line.11"></a> 15<FONT color="green">012</FONT> // documentation and/or other materials provided with the distribution.<a name="line.12"></a> 16<FONT color="green">013</FONT> // Neither the name of the OWASP nor the names of its contributors may<a name="line.13"></a> 17<FONT color="green">014</FONT> // be used to endorse or promote products derived from this software<a name="line.14"></a> 18<FONT color="green">015</FONT> // without specific prior written permission.<a name="line.15"></a> 19<FONT color="green">016</FONT> // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS<a name="line.16"></a> 20<FONT color="green">017</FONT> // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT<a name="line.17"></a> 21<FONT color="green">018</FONT> // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS<a name="line.18"></a> 22<FONT color="green">019</FONT> // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE<a name="line.19"></a> 23<FONT color="green">020</FONT> // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,<a name="line.20"></a> 24<FONT color="green">021</FONT> // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,<a name="line.21"></a> 25<FONT color="green">022</FONT> // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;<a name="line.22"></a> 26<FONT color="green">023</FONT> // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER<a name="line.23"></a> 27<FONT color="green">024</FONT> // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT<a name="line.24"></a> 28<FONT color="green">025</FONT> // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN<a name="line.25"></a> 29<FONT color="green">026</FONT> // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE<a name="line.26"></a> 30<FONT color="green">027</FONT> // POSSIBILITY OF SUCH DAMAGE.<a name="line.27"></a> 31<FONT color="green">028</FONT> <a name="line.28"></a> 32<FONT color="green">029</FONT> package org.owasp.html.examples;<a name="line.29"></a> 33<FONT color="green">030</FONT> <a name="line.30"></a> 34<FONT color="green">031</FONT> import java.io.IOException;<a name="line.31"></a> 35<FONT color="green">032</FONT> import java.io.InputStreamReader;<a name="line.32"></a> 36<FONT color="green">033</FONT> import java.util.regex.Pattern;<a name="line.33"></a> 37<FONT color="green">034</FONT> <a name="line.34"></a> 38<FONT color="green">035</FONT> import org.owasp.html.Handler;<a name="line.35"></a> 39<FONT color="green">036</FONT> import org.owasp.html.HtmlPolicyBuilder;<a name="line.36"></a> 40<FONT color="green">037</FONT> import org.owasp.html.HtmlSanitizer;<a name="line.37"></a> 41<FONT color="green">038</FONT> import org.owasp.html.HtmlStreamEventReceiver;<a name="line.38"></a> 42<FONT color="green">039</FONT> import org.owasp.html.HtmlStreamRenderer;<a name="line.39"></a> 43<FONT color="green">040</FONT> <a name="line.40"></a> 44<FONT color="green">041</FONT> import com.google.common.base.Charsets;<a name="line.41"></a> 45<FONT color="green">042</FONT> import com.google.common.base.Function;<a name="line.42"></a> 46<FONT color="green">043</FONT> import com.google.common.base.Throwables;<a name="line.43"></a> 47<FONT color="green">044</FONT> import com.google.common.io.CharStreams;<a name="line.44"></a> 48<FONT color="green">045</FONT> <a name="line.45"></a> 49<FONT color="green">046</FONT> /**<a name="line.46"></a> 50<FONT color="green">047</FONT> * Based on the<a name="line.47"></a> 51<FONT color="green">048</FONT> * <a href="http://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project#Stage_2_-_Choosing_a_base_policy_file">AntiSamy Slashdot example</a>.<a name="line.48"></a> 52<FONT color="green">049</FONT> * <blockquote><a name="line.49"></a> 53<FONT color="green">050</FONT> * Slashdot (http://www.slashdot.org/) is a techie news site that allows users<a name="line.50"></a> 54<FONT color="green">051</FONT> * to respond anonymously to news posts with very limited HTML markup. Now<a name="line.51"></a> 55<FONT color="green">052</FONT> * Slashdot is not only one of the coolest sites around, it's also one that's<a name="line.52"></a> 56<FONT color="green">053</FONT> * been subject to many different successful attacks. Even more unfortunate is<a name="line.53"></a> 57<FONT color="green">054</FONT> * the fact that most of the attacks led users to the infamous goatse.cx picture<a name="line.54"></a> 58<FONT color="green">055</FONT> * (please don't go look it up). The rules for Slashdot are fairly strict: users<a name="line.55"></a> 59<FONT color="green">056</FONT> * can only submit the following HTML tags and no CSS: {@code <b>}, {@code <u>},<a name="line.56"></a> 60<FONT color="green">057</FONT> * {@code <i>}, {@code <a>}, {@code <blockquote>}.<a name="line.57"></a> 61<FONT color="green">058</FONT> * <br><a name="line.58"></a> 62<FONT color="green">059</FONT> * Accordingly, we've built a policy file that allows fairly similar<a name="line.59"></a> 63<FONT color="green">060</FONT> * functionality. All text-formatting tags that operate directly on the font,<a name="line.60"></a> 64<FONT color="green">061</FONT> * color or emphasis have been allowed.<a name="line.61"></a> 65<FONT color="green">062</FONT> * </blockquote><a name="line.62"></a> 66<FONT color="green">063</FONT> */<a name="line.63"></a> 67<FONT color="green">064</FONT> public class SlashdotPolicyExample {<a name="line.64"></a> 68<FONT color="green">065</FONT> <a name="line.65"></a> 69<FONT color="green">066</FONT> public static final Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy><a name="line.66"></a> 70<FONT color="green">067</FONT> POLICY_DEFINITION = new HtmlPolicyBuilder()<a name="line.67"></a> 71<FONT color="green">068</FONT> .allowStandardUrlProtocols()<a name="line.68"></a> 72<FONT color="green">069</FONT> .allowAttributes("title").globally()<a name="line.69"></a> 73<FONT color="green">070</FONT> .allowAttributes("href").onElements("a")<a name="line.70"></a> 74<FONT color="green">071</FONT> .requireRelNofollowOnLinks()<a name="line.71"></a> 75<FONT color="green">072</FONT> .allowAttributes("lang").matching(Pattern.compile("[a-zA-Z]{2,20}"))<a name="line.72"></a> 76<FONT color="green">073</FONT> .globally()<a name="line.73"></a> 77<FONT color="green">074</FONT> .allowAttributes("align")<a name="line.74"></a> 78<FONT color="green">075</FONT> .matching(true, "center", "left", "right", "justify", "char")<a name="line.75"></a> 79<FONT color="green">076</FONT> .onElements("p")<a name="line.76"></a> 80<FONT color="green">077</FONT> .allowElements(<a name="line.77"></a> 81<FONT color="green">078</FONT> "a", "p", "div", "i", "b", "em", "blockquote", "tt", "strong",<a name="line.78"></a> 82<FONT color="green">079</FONT> "br", "ul", "ol", "li")<a name="line.79"></a> 83<FONT color="green">080</FONT> // Custom slashdot tags.<a name="line.80"></a> 84<FONT color="green">081</FONT> // These could be rewritten in the sanitizer using an ElementPolicy.<a name="line.81"></a> 85<FONT color="green">082</FONT> .allowElements("quote", "ecode")<a name="line.82"></a> 86<FONT color="green">083</FONT> .toFactory();<a name="line.83"></a> 87<FONT color="green">084</FONT> <a name="line.84"></a> 88<FONT color="green">085</FONT> public static void main(String[] args) throws IOException {<a name="line.85"></a> 89<FONT color="green">086</FONT> if (args.length == 1) {<a name="line.86"></a> 90<FONT color="green">087</FONT> System.err.println("Reads from STDIN and writes to STDOUT");<a name="line.87"></a> 91<FONT color="green">088</FONT> System.exit(-1);<a name="line.88"></a> 92<FONT color="green">089</FONT> }<a name="line.89"></a> 93<FONT color="green">090</FONT> System.err.println("[Reading from STDIN]");<a name="line.90"></a> 94<FONT color="green">091</FONT> String html = CharStreams.toString(<a name="line.91"></a> 95<FONT color="green">092</FONT> new InputStreamReader(System.in, Charsets.UTF_8));<a name="line.92"></a> 96<FONT color="green">093</FONT> HtmlStreamRenderer renderer = HtmlStreamRenderer.create(<a name="line.93"></a> 97<FONT color="green">094</FONT> System.out,<a name="line.94"></a> 98<FONT color="green">095</FONT> new Handler<IOException>() {<a name="line.95"></a> 99<FONT color="green">096</FONT> public void handle(IOException ex) {<a name="line.96"></a> 100<FONT color="green">097</FONT> Throwables.propagate(ex); // System.out suppresses IOExceptions<a name="line.97"></a> 101<FONT color="green">098</FONT> }<a name="line.98"></a> 102<FONT color="green">099</FONT> },<a name="line.99"></a> 103<FONT color="green">100</FONT> new Handler<String>() {<a name="line.100"></a> 104<FONT color="green">101</FONT> public void handle(String x) {<a name="line.101"></a> 105<FONT color="green">102</FONT> throw new AssertionError(x);<a name="line.102"></a> 106<FONT color="green">103</FONT> }<a name="line.103"></a> 107<FONT color="green">104</FONT> });<a name="line.104"></a> 108<FONT color="green">105</FONT> HtmlSanitizer.sanitize(html, POLICY_DEFINITION.apply(renderer));<a name="line.105"></a> 109<FONT color="green">106</FONT> }<a name="line.106"></a> 110<FONT color="green">107</FONT> }<a name="line.107"></a> 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171</PRE> 172</BODY> 173</HTML> 174