1// Copyright (c) 2011, Mike Samuel 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions 6// are met: 7// 8// Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// Redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution. 13// Neither the name of the OWASP nor the names of its contributors may 14// be used to endorse or promote products derived from this software 15// without specific prior written permission. 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27// POSSIBILITY OF SUCH DAMAGE. 28 29package org.owasp.html; 30 31import java.io.File; 32import java.io.StringReader; 33import java.util.List; 34import java.util.ListIterator; 35 36import org.w3c.dom.Node; 37import org.xml.sax.InputSource; 38 39import com.google.common.base.Charsets; 40import com.google.common.io.Files; 41 42import nu.validator.htmlparser.dom.HtmlDocumentBuilder; 43 44public class Benchmark { 45 46 public static void main(String[] args) throws Exception { 47 String html = Files.toString(new File(args[0]), Charsets.UTF_8); 48 49 boolean timeLibhtmlparser = true; 50 boolean timeSanitize = true; 51 boolean timePolicyBuilder = true; 52 53 if (args.length > 1) { 54 String s = args[1]; 55 timeLibhtmlparser = s.contains("h"); 56 timeSanitize = s.contains("s"); 57 timePolicyBuilder = s.contains("p"); 58 } 59 60 int n = 0; // Defeat optimizations. 61 62 if (timeLibhtmlparser) { 63 for (int i = 100; --i >= 0;) { 64 n += parseUsingLibhtmlparser(html); 65 } 66 } 67 68 if (timeSanitize) { 69 for (int i = 100; --i >= 0;) { 70 n += sanitize(html).length(); 71 } 72 } 73 74 if (timePolicyBuilder) { 75 for (int i = 100; --i >= 0;) { 76 n += sanitizeUsingPolicyBuilder(html).length(); 77 } 78 } 79 80 long t0 = 0, t1 = -1; 81 if (timeLibhtmlparser) { 82 t0 = System.nanoTime(); 83 for (int i = 100; --i >= 0;) { 84 n += parseUsingLibhtmlparser(html); 85 } 86 t1 = System.nanoTime(); 87 } 88 89 long t2 = 0, t3 = -1; 90 if (timeSanitize) { 91 t2 = System.nanoTime(); 92 for (int i = 100; --i >= 0;) { 93 n += sanitize(html).length(); 94 } 95 t3 = System.nanoTime(); 96 } 97 98 long t4 = 0, t5 = -1; 99 if (timePolicyBuilder) { 100 t4 = System.nanoTime(); 101 for (int i = 100; --i >= 0;) { 102 n += sanitizeUsingPolicyBuilder(html).length(); 103 } 104 t5 = System.nanoTime(); 105 } 106 107 // Defeat optimization by using n. 108 if (n < 0) { 109 throw new AssertionError("Oh noes underflow"); 110 } 111 112 if (timeLibhtmlparser) { 113 System.err.println(String.format( 114 "Tree parse : %12d", (t1 - t0))); 115 } 116 if (timeSanitize) { 117 System.err.println(String.format( 118 "Full sanitize custom : %12d", (t3 - t2))); 119 } 120 if (timePolicyBuilder) { 121 System.err.println(String.format( 122 "Full sanitize w/ PB : %12d", (t5 - t4))); 123 } 124 } 125 126 private static int parseUsingLibhtmlparser(String html) throws Exception { 127 HtmlDocumentBuilder parser = new HtmlDocumentBuilder(); 128 Node node = parser.parse(new InputSource(new StringReader(html))); 129 return System.identityHashCode(node) >> 24; 130 } 131 132 private static String sanitize(String html) throws Exception { 133 StringBuilder sb = new StringBuilder(html.length()); 134 135 final HtmlStreamRenderer renderer = HtmlStreamRenderer.create( 136 sb, new Handler<String>() { 137 138 public void handle(String x) { 139 throw new AssertionError(x); 140 } 141 }); 142 143 HtmlSanitizer.sanitize(html, new HtmlSanitizer.Policy() { 144 145 public void openDocument() { 146 renderer.openDocument(); 147 } 148 149 public void closeDocument() { 150 renderer.closeDocument(); 151 } 152 153 public void text(String textChunk) { 154 renderer.text(textChunk); 155 } 156 157 public void openTag(String elementName, List<String> attrs) { 158 if ("a".equals(elementName)) { 159 for (ListIterator<String> it = attrs.listIterator(); it.hasNext();) { 160 String name = it.next(); 161 if ("href".equals(name)) { 162 it.next(); 163 } else { 164 it.remove(); 165 it.next(); 166 it.remove(); 167 } 168 } 169 renderer.openTag(elementName, attrs); 170 } 171 } 172 173 public void closeTag(String elementName) { 174 if ("a".equals(elementName)) { 175 renderer.closeTag(elementName); 176 } 177 } 178 }); 179 return sb.toString(); 180 } 181 182 private static HtmlPolicyBuilder policyBuilder; 183 184 private static String sanitizeUsingPolicyBuilder(String html) 185 throws Exception { 186 if (policyBuilder == null) { 187 policyBuilder = new HtmlPolicyBuilder() 188 .allowStandardUrlProtocols() 189 .allowElements("a") 190 .allowAttributes("href").onElements("a"); 191 } 192 193 StringBuilder sb = new StringBuilder(html.length()); 194 195 HtmlStreamRenderer renderer = HtmlStreamRenderer.create( 196 sb, new Handler<String>() { 197 public void handle(String x) { 198 throw new AssertionError(x); 199 } 200 }); 201 202 HtmlSanitizer.sanitize(html, policyBuilder.build(renderer)); 203 return sb.toString(); 204 } 205 206} 207