1f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// Copyright (c) 2011, Mike Samuel
2f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// All rights reserved.
3f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel//
4f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// Redistribution and use in source and binary forms, with or without
5f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// modification, are permitted provided that the following conditions
6f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// are met:
7f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel//
8f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// Redistributions of source code must retain the above copyright
9f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// notice, this list of conditions and the following disclaimer.
10f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// Redistributions in binary form must reproduce the above copyright
11f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// notice, this list of conditions and the following disclaimer in the
12f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// documentation and/or other materials provided with the distribution.
13f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// Neither the name of the OWASP nor the names of its contributors may
14f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// be used to endorse or promote products derived from this software
15f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// without specific prior written permission.
16f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel// POSSIBILITY OF SUCH DAMAGE.
28f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
29f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelpackage org.owasp.html;
30f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
31f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport com.google.common.base.Function;
32f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport com.google.common.collect.Lists;
33f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
34489a0ec7301a86af8497d24748336db09ca278damikesamuelimport java.io.IOException;
35f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport java.io.StringReader;
36f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport java.util.List;
37f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport java.util.Random;
38f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
39f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport org.w3c.dom.Attr;
40f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport org.w3c.dom.NamedNodeMap;
41f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport org.w3c.dom.Node;
42f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport org.xml.sax.InputSource;
43489a0ec7301a86af8497d24748336db09ca278damikesamuelimport org.xml.sax.SAXException;
44f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
45f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuelimport nu.validator.htmlparser.dom.HtmlDocumentBuilder;
46f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
47f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel/**
485a047cbf3149f42b3e3309b1785ed0dc05d21ad4mikesamuel * Throws random policy calls to find evidence against the claim that the
495a047cbf3149f42b3e3309b1785ed0dc05d21ad4mikesamuel * security of the policy is decoupled from that of the parser.
505a047cbf3149f42b3e3309b1785ed0dc05d21ad4mikesamuel * This test is stochastic -- not guaranteed to pass or fail consistently.
515a047cbf3149f42b3e3309b1785ed0dc05d21ad4mikesamuel * If you see a failure, please report it along with the seed from the output.
525a047cbf3149f42b3e3309b1785ed0dc05d21ad4mikesamuel * If you want to repeat a failure, set the system property "junit.seed".
535a047cbf3149f42b3e3309b1785ed0dc05d21ad4mikesamuel *
545a047cbf3149f42b3e3309b1785ed0dc05d21ad4mikesamuel * @author Mike Samuel <mikesamuel@gmail.com>
55f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel */
565a047cbf3149f42b3e3309b1785ed0dc05d21ad4mikesamuelpublic class HtmlPolicyBuilderFuzzerTest extends FuzzyTestCase {
57f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
58f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  final Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policyFactory
59f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      = new HtmlPolicyBuilder()
60b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel      .allowElements("a", "b", "xmp", "pre")
61f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      .allowAttributes("href").onElements("a")
62f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      .allowAttributes("title").globally()
63f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      .allowStandardUrlProtocols()
64f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      .toFactory();
65f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
66f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  static final String[] CHUNKS = {
67f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "Hello, World!", "<b>", "</b>",
68f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "<a onclick='doEvil()' href=javascript:alert(1337)>", "</a>",
69f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "<script>", "</script>", "<xmp>", "</xmp>", "javascript:alert(1337)",
70f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "<style>", "</style>", "<plaintext>", "<!--", "-->", "<![CDATA[", "]]>",
71f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  };
72f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
73f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  static final String[] ELEMENT_NAMES = {
74f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "a", "A",
75f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "b", "B",
76f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "script", "SCRipT",
77f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "style", "STYLE",
78f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "object", "Object",
79f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "noscript", "noScript",
80f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "xmp", "XMP",
81f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  };
82f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
83f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  static final String[] ATTR_NAMES = {
84f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    "href", "id", "class", "onclick", "checked", "style",
85f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  };
86f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
87489a0ec7301a86af8497d24748336db09ca278damikesamuel  public final void testFuzzedOutput() throws IOException, SAXException {
88489a0ec7301a86af8497d24748336db09ca278damikesamuel    boolean passed = false;
89f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    try {
90f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      for (int i = 1000; --i >= 0;) {
91f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        StringBuilder sb = new StringBuilder();
92f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        HtmlSanitizer.Policy policy = policyFactory.apply(
93f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel            HtmlStreamRenderer.create(sb, Handler.DO_NOTHING));
94f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        policy.openDocument();
95f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        List<String> attributes = Lists.newArrayList();
96f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        for (int j = 50; --j >= 0;) {
97489a0ec7301a86af8497d24748336db09ca278damikesamuel          int r = rnd.nextInt(3);
98489a0ec7301a86af8497d24748336db09ca278damikesamuel          switch (r) {
99f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel            case 0:
100f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              attributes.clear();
101f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              if (rnd.nextBoolean()) {
102f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel                for (int k = rnd.nextInt(4); --k >= 0;) {
103f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel                  attributes.add(pick(rnd, ATTR_NAMES));
104f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel                  attributes.add(pickChunk(rnd));
105f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel                }
106f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              }
107f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              policy.openTag(pick(rnd, ELEMENT_NAMES), attributes);
108f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              break;
109f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel            case 1:
110f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              policy.closeTag(pick(rnd, ELEMENT_NAMES));
111f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              break;
112f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel            case 2:
113f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              policy.text(pickChunk(rnd));
114f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel              break;
115489a0ec7301a86af8497d24748336db09ca278damikesamuel            default:
116489a0ec7301a86af8497d24748336db09ca278damikesamuel              throw new AssertionError(
117489a0ec7301a86af8497d24748336db09ca278damikesamuel                  "Randomly chosen number in [0-3) was " + r);
118f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel          }
119f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        }
120f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        policy.closeDocument();
121f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
122f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        String html = sb.toString();
123f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        HtmlDocumentBuilder parser = new HtmlDocumentBuilder();
124f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        Node node = parser.parseFragment(
125f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel            new InputSource(new StringReader(html)), "body");
126f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        checkSafe(node, html);
127f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      }
128489a0ec7301a86af8497d24748336db09ca278damikesamuel      passed = true;
129489a0ec7301a86af8497d24748336db09ca278damikesamuel    } finally {
130489a0ec7301a86af8497d24748336db09ca278damikesamuel      if (!passed) {
131489a0ec7301a86af8497d24748336db09ca278damikesamuel        System.err.println("Using seed " + seed + "L");
132489a0ec7301a86af8497d24748336db09ca278damikesamuel      }
133f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    }
134f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  }
135f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
136f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  private static void checkSafe(Node node, String html) {
137f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    switch (node.getNodeType()) {
138f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      case Node.ELEMENT_NODE:
139f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        String name = node.getNodeName();
140b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel        if (!"a".equals(name) && !"b".equals(name) && !"pre".equals(name)) {
141f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel          fail("Illegal element name " + name + " : " + html);
142f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        }
143f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        NamedNodeMap attrs = node.getAttributes();
144f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        for (int i = 0, n = attrs.getLength(); i < n; ++i) {
145f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel          Attr a = (Attr) attrs.item(i);
146f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel          if ("title".equals(a.getName())) {
147f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel            // ok
148f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel          } else if ("href".equals(a.getName())) {
149f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel            assertEquals(html, "a", name);
150f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel            assertFalse(
151f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel                html, Strings.toLowerCase(a.getValue()).contains("script:"));
152f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel          }
153f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        }
154f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel        break;
155f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    }
156f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    for (Node child = node.getFirstChild(); child != null;
157f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel         child = child.getNextSibling()) {
158f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      checkSafe(child, html);
159f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    }
160f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  }
161f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
162f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  private static String pick(Random rnd, String[] choices) {
163f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    return choices[rnd.nextInt(choices.length)];
164f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  }
165f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
166f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  private static String pickChunk(Random rnd) {
167f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    String chunk = pick(rnd, CHUNKS);
168f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    int start = 0;
169f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    int end = chunk.length();
170f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    if (rnd.nextBoolean()) {
171f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      start = rnd.nextInt(end - 1);
172f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    }
173f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    if (end - start < 2 && rnd.nextBoolean()) {
174f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      end = start + rnd.nextInt(end - start);
175f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    }
176f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    return chunk.substring(start, end);
177f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  }
178f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel}
179