1// Copyright (c) 2011, Mike Samuel
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// Neither the name of the OWASP nor the names of its contributors may
14// be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28
29package org.owasp.html;
30
31import java.io.File;
32import java.io.StringReader;
33import java.util.List;
34import java.util.ListIterator;
35
36import org.w3c.dom.Node;
37import org.xml.sax.InputSource;
38
39import com.google.common.base.Charsets;
40import com.google.common.io.Files;
41
42import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
43
44public class Benchmark {
45
46  public static void main(String[] args) throws Exception {
47    String html = Files.toString(new File(args[0]), Charsets.UTF_8);
48
49    boolean timeLibhtmlparser = true;
50    boolean timeSanitize = true;
51    boolean timePolicyBuilder = true;
52
53    if (args.length > 1) {
54      String s = args[1];
55      timeLibhtmlparser = s.contains("h");
56      timeSanitize = s.contains("s");
57      timePolicyBuilder = s.contains("p");
58    }
59
60    int n = 0;  // Defeat optimizations.
61
62    if (timeLibhtmlparser) {
63      for (int i = 100; --i >= 0;) {
64        n += parseUsingLibhtmlparser(html);
65      }
66    }
67
68    if (timeSanitize) {
69      for (int i = 100; --i >= 0;) {
70        n += sanitize(html).length();
71      }
72    }
73
74    if (timePolicyBuilder) {
75      for (int i = 100; --i >= 0;) {
76        n += sanitizeUsingPolicyBuilder(html).length();
77      }
78    }
79
80    long t0 = 0, t1 = -1;
81    if (timeLibhtmlparser) {
82      t0 = System.nanoTime();
83      for (int i = 100; --i >= 0;) {
84        n += parseUsingLibhtmlparser(html);
85      }
86      t1 = System.nanoTime();
87    }
88
89    long t2 = 0, t3 = -1;
90    if (timeSanitize) {
91      t2 = System.nanoTime();
92      for (int i = 100; --i >= 0;) {
93        n += sanitize(html).length();
94      }
95      t3 = System.nanoTime();
96    }
97
98    long t4 = 0, t5 = -1;
99    if (timePolicyBuilder) {
100      t4 = System.nanoTime();
101      for (int i = 100; --i >= 0;) {
102        n += sanitizeUsingPolicyBuilder(html).length();
103      }
104      t5 = System.nanoTime();
105    }
106
107    // Defeat optimization by using n.
108    if (n < 0) {
109      throw new AssertionError("Oh noes underflow");
110    }
111
112    if (timeLibhtmlparser) {
113      System.err.println(String.format(
114          "Tree parse           : %12d", (t1 - t0)));
115    }
116    if (timeSanitize) {
117      System.err.println(String.format(
118          "Full sanitize custom : %12d", (t3 - t2)));
119    }
120    if (timePolicyBuilder) {
121      System.err.println(String.format(
122          "Full sanitize w/ PB  : %12d", (t5 - t4)));
123    }
124  }
125
126  private static int parseUsingLibhtmlparser(String html) throws Exception {
127    HtmlDocumentBuilder parser = new HtmlDocumentBuilder();
128    Node node = parser.parse(new InputSource(new StringReader(html)));
129    return System.identityHashCode(node) >> 24;
130  }
131
132  private static String sanitize(String html) throws Exception {
133    StringBuilder sb = new StringBuilder(html.length());
134
135    final HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
136        sb, new Handler<String>() {
137
138          public void handle(String x) {
139            throw new AssertionError(x);
140          }
141        });
142
143    HtmlSanitizer.sanitize(html, new HtmlSanitizer.Policy() {
144
145      public void openDocument() {
146        renderer.openDocument();
147      }
148
149      public void closeDocument() {
150        renderer.closeDocument();
151      }
152
153      public void text(String textChunk) {
154        renderer.text(textChunk);
155      }
156
157      public void openTag(String elementName, List<String> attrs) {
158        if ("a".equals(elementName)) {
159          for (ListIterator<String> it = attrs.listIterator(); it.hasNext();) {
160            String name = it.next();
161            if ("href".equals(name)) {
162              it.next();
163            } else {
164              it.remove();
165              it.next();
166              it.remove();
167            }
168          }
169          renderer.openTag(elementName, attrs);
170        }
171      }
172
173      public void closeTag(String elementName) {
174        if ("a".equals(elementName)) {
175          renderer.closeTag(elementName);
176        }
177      }
178    });
179    return sb.toString();
180  }
181
182  private static HtmlPolicyBuilder policyBuilder;
183
184  private static String sanitizeUsingPolicyBuilder(String html)
185      throws Exception {
186    if (policyBuilder == null) {
187      policyBuilder = new HtmlPolicyBuilder()
188          .allowStandardUrlProtocols()
189          .allowElements("a")
190          .allowAttributes("href").onElements("a");
191    }
192
193    StringBuilder sb = new StringBuilder(html.length());
194
195    HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
196        sb, new Handler<String>() {
197          public void handle(String x) {
198            throw new AssertionError(x);
199          }
200        });
201
202    HtmlSanitizer.sanitize(html, policyBuilder.build(renderer));
203    return sb.toString();
204  }
205
206}
207