1afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com/* 2afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Copyright (c) 2007-2010, Arshan Dabirsiaghi, Jason Li 30f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * 4afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * All rights reserved. 50f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * 6afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 70f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * 8afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 9afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 10afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * 12afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 13afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 14afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 15afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 16afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 17afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 18afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 19afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 20afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 21afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com */ 24afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 25afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.compackage org.owasp.html; 26afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 270f3a7565157c70edb1935f04888fdc0407397fabmikesamuelimport java.io.IOException; 28afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport java.io.InputStreamReader; 29afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport java.net.URL; 300f3a7565157c70edb1935f04888fdc0407397fabmikesamuelimport java.net.URLConnection; 31d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuelimport java.util.regex.Matcher; 32afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport java.util.regex.Pattern; 33afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 340f3a7565157c70edb1935f04888fdc0407397fabmikesamuelimport org.apache.commons.codec.binary.Base64; 35afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 360f3a7565157c70edb1935f04888fdc0407397fabmikesamuelimport junit.framework.AssertionFailedError; 37afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport junit.framework.Test; 38afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport junit.framework.TestCase; 39afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.comimport junit.framework.TestSuite; 40afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 414e867904c8295537803c1c8a076e130df5674b58mikesamuel 42afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com/** 43afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * This class tests AntiSamy functionality and the basic policy file which 44afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * should be immune to XSS and CSS phishing attacks. 450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * 46afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com * @author Arshan Dabirsiaghi 470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * 48afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com */ 49afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.compublic class AntiSamyTest extends TestCase { 50afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel static final boolean RUN_KNOWN_FAILURES = false; 52036155387b0beda0993f361b73b020b773e58708mikesamuel static final boolean DISABLE_INTERNETS = false; 530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 544e867904c8295537803c1c8a076e130df5674b58mikesamuel private static HtmlSanitizer.Policy makePolicy(Appendable buffer) { 550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel final HtmlStreamRenderer renderer = HtmlStreamRenderer.create( 560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel buffer, 570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel new Handler<IOException>() { 580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void handle(IOException ex) { 590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel AssertionFailedError failure = new AssertionFailedError(); 600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel failure.initCause(ex); 610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel throw failure; 620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel }, 640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel new Handler<String>() { 650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void handle(String errorMessage) { 660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel fail(errorMessage); 670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel }); 690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 704e867904c8295537803c1c8a076e130df5674b58mikesamuel return new HtmlPolicyBuilder() 714e867904c8295537803c1c8a076e130df5674b58mikesamuel .allowElements( 724e867904c8295537803c1c8a076e130df5674b58mikesamuel "a", "b", "br", "div", "font", "i", "img", "input", "li", 734e867904c8295537803c1c8a076e130df5674b58mikesamuel "ol", "p", "span", "td", "ul") 74846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel .allowAttributes("checked", "type").onElements("input") 75846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel .allowAttributes("color").onElements("font") 76846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel .allowAttributes("href").onElements("a") 77846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel .allowAttributes("src").onElements("img") 78846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel .allowAttributes("class", "id", "title").globally() 79846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel .allowAttributes("char").matching( 804e867904c8295537803c1c8a076e130df5674b58mikesamuel new AttributePolicy() { 814e867904c8295537803c1c8a076e130df5674b58mikesamuel public String apply( 824e867904c8295537803c1c8a076e130df5674b58mikesamuel String elementName, String attributeName, String value) { 834e867904c8295537803c1c8a076e130df5674b58mikesamuel return value.length() == 1 ? value : null; 840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 85846d5d0377617bd20ac271a486f07bfe757cc7a2mikesamuel }).onElements("td") 864e867904c8295537803c1c8a076e130df5674b58mikesamuel .allowStandardUrlProtocols() 874e867904c8295537803c1c8a076e130df5674b58mikesamuel .requireRelNofollowOnLinks() 884e867904c8295537803c1c8a076e130df5674b58mikesamuel .allowStyling() 894e867904c8295537803c1c8a076e130df5674b58mikesamuel .build(renderer); 900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 924e867904c8295537803c1c8a076e130df5674b58mikesamuel private static String sanitize(String html) { 930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel StringBuilder sb = new StringBuilder(); 940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 954e867904c8295537803c1c8a076e130df5674b58mikesamuel HtmlSanitizer.sanitize(html, makePolicy(sb)); 960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel return sb.toString(); 980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel private static final String[] BASE64_BAD_XML_STRINGS = new String[] { 1010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // first string is 1020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // "<a - href=\"http://www.owasp.org\">click here</a>" 1030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "PGEgLSBocmVmPSJodHRwOi8vd3d3Lm93YXNwLm9yZyI+Y2xpY2sgaGVyZTwvYT4=", 1040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // the rest are randomly generated 300 byte sequences which generate 1050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // parser errors, turned into Strings 1060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "uz0sEy5aDiok6oufQRaYPyYOxbtlACRnfrOnUVIbOstiaoB95iw+dJYuO5sI9nudhRtSYLANlcdgO0pRb+65qKDwZ5o6GJRMWv4YajZk+7Q3W/GN295XmyWUpxuyPGVi7d5fhmtYaYNW6vxyKK1Wjn9IEhIrfvNNjtEF90vlERnz3wde4WMaKMeciqgDXuZHEApYmUcu6Wbx4Q6WcNDqohAN/qCli74tvC+Umy0ZsQGU7E+BvJJ1tLfMcSzYiz7Q15ByZOYrA2aa0wDu0no3gSatjGt6aB4h30D9xUP31LuPGZ2GdWwMfZbFcfRgDSh42JPwa1bODmt5cw0Y8ACeyrIbfk9IkX1bPpYfIgtO7TwuXjBbhh2EEixOZ2YkcsvmcOSVTvraChbxv6kP", 1070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "PIWjMV4y+MpuNLtcY3vBRG4ZcNaCkB9wXJr3pghmFA6rVXAik+d5lei48TtnHvfvb5rQZVceWKv9cR/9IIsLokMyN0omkd8j3TV0DOh3JyBjPHFCu1Gp4Weo96h5C6RBoB0xsE4QdS2Y1sq/yiha9IebyHThAfnGU8AMC4AvZ7DDBccD2leZy2Q617ekz5grvxEG6tEcZ3fCbJn4leQVVo9MNoerim8KFHGloT+LxdgQR6YN5y1ii3bVGreM51S4TeANujdqJXp8B7B1Gk3PKCRS2T1SNFZedut45y+/w7wp5AUQCBUpIPUj6RLp+y3byWhcbZbJ70KOzTSZuYYIKLLo8047Fej43bIaghJm0F9yIKk3C5gtBcw8T5pciJoVXrTdBAK/8fMVo29P", 1080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "uCk7HocubT6KzJw2eXpSUItZFGkr7U+D89mJw70rxdqXP2JaG04SNjx3dd84G4bz+UVPPhPO2gBAx2vHI0xhgJG9T4vffAYh2D1kenmr+8gIHt6WDNeD+HwJeAbJYhfVFMJsTuIGlYIw8+I+TARK0vqjACyRwMDAndhXnDrk4E5U3hyjqS14XX0kIDZYM6FGFPXe/s+ba2886Q8o1a7WosgqqAmt4u6R3IHOvVf5/PIeZrBJKrVptxjdjelP8Xwjq2ujWNtR3/HM1kjRlJi4xedvMRe4Rlxek0NDLC9hNd18RYi0EjzQ0bGSDDl0813yv6s6tcT6xHMzKvDcUcFRkX6BbxmoIcMsVeHM/ur6yRv834o/TT5IdiM9/wpkuICFOWIfM+Y8OWhiU6BK", 1090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "Bb6Cqy6stJ0YhtPirRAQ8OXrPFKAeYHeuZXuC1qdHJRlweEzl4F2z/ZFG7hzr5NLZtzrRG3wm5TXl6Aua5G6v0WKcjJiS2V43WB8uY1BFK1d2y68c1gTRSF0u+VTThGjz+q/R6zE8HG8uchO+KPw64RehXDbPQ4uadiL+UwfZ4BzY1OHhvM5+2lVlibG+awtH6qzzx6zOWemTih932Lt9mMnm3FzEw7uGzPEYZ3aBV5xnbQ2a2N4UXIdm7RtIUiYFzHcLe5PZM/utJF8NdHKy0SPaKYkdXHli7g3tarzAabLZqLT4k7oemKYCn/eKRreZjqTB2E8Kc9Swf3jHDkmSvzOYE8wi1vQ3X7JtPcQ2O4muvpSa70NIE+XK1CgnnsL79Qzci1/1xgkBlNq", 1100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "FZNVr4nOICD1cNfAvQwZvZWi+P4I2Gubzrt+wK+7gLEY144BosgKeK7snwlA/vJjPAnkFW72APTBjY6kk4EOyoUef0MxRnZEU11vby5Ru19eixZBFB/SVXDJleLK0z3zXXE8U5Zl5RzLActHakG8Psvdt8TDscQc4MPZ1K7mXDhi7FQdpjRTwVxFyCFoybQ9WNJNGPsAkkm84NtFb4KjGpwVC70oq87tM2gYCrNgMhBfdBl0bnQHoNBCp76RKdpq1UAY01t1ipfgt7BoaAr0eTw1S32DezjfkAz04WyPTzkdBKd3b44rX9dXEbm6szAz0SjgztRPDJKSMELjq16W2Ua8d1AHq2Dz8JlsvGzi2jICUjpFsIfRmQ/STSvOT8VsaCFhwL1zDLbn5jCr", 1110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "RuiRkvYjH2FcCjNzFPT2PJWh7Q6vUbfMadMIEnw49GvzTmhk4OUFyjY13GL52JVyqdyFrnpgEOtXiTu88Cm+TiBI7JRh0jRs3VJRP3N+5GpyjKX7cJA46w8PrH3ovJo3PES7o8CSYKRa3eUs7BnFt7kUCvMqBBqIhTIKlnQd2JkMNnhhCcYdPygLx7E1Vg+H3KybcETsYWBeUVrhRl/RAyYJkn6LddjPuWkDdgIcnKhNvpQu4MMqF3YbzHgyTh7bdWjy1liZle7xR/uRbOrRIRKTxkUinQGEWyW3bbXOvPO71E7xyKywBanwg2FtvzOoRFRVF7V9mLzPSqdvbM7VMQoLFob2UgeNLbVHkWeQtEqQWIV5RMu3+knhoqGYxP/3Srszp0ELRQy/xyyD", 1120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "mqBEVbNnL929CUA3sjkOmPB5dL0/a0spq8LgbIsJa22SfP580XduzUIKnCtdeC9TjPB/GEPp/LvEUFaLTUgPDQQGu3H5UCZyjVTAMHl45me/0qISEf903zFFqW5Lk3TS6iPrithqMMvhdK29Eg5OhhcoHS+ALpn0EjzUe86NywuFNb6ID4o8aF/ztZlKJegnpDAm3JuhCBauJ+0gcOB8GNdWd5a06qkokmwk1tgwWat7cQGFIH1NOvBwRMKhD51MJ7V28806a3zkOVwwhOiyyTXR+EcDA/aq5acX0yailLWB82g/2GR/DiaqNtusV+gpcMTNYemEv3c/xLkClJc29DSfTsJGKsmIDMqeBMM7RRBNinNAriY9iNX1UuHZLr/tUrRNrfuNT5CvvK1K", 1130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "IMcfbWZ/iCa/LDcvMlk6LEJ0gDe4ohy2Vi0pVBd9aqR5PnRj8zGit8G2rLuNUkDmQ95bMURasmaPw2Xjf6SQjRk8coIHDLtbg/YNQVMabE8pKd6EaFdsGWJkcFoonxhPR29aH0xvjC4Mp3cJX3mjqyVsOp9xdk6d0Y2hzV3W/oPCq0DV03pm7P3+jH2OzoVVIDYgG1FD12S03otJrCXuzDmE2LOQ0xwgBQ9sREBLXwQzUKfXH8ogZzjdR19pX9qe0rRKMNz8k5lqcF9R2z+XIS1QAfeV9xopXA0CeyrhtoOkXV2i8kBxyodDp7tIeOvbEfvaqZGJgaJyV8UMTDi7zjwNeVdyKa8USH7zrXSoCl+Ud5eflI9vxKS+u9Bt1ufBHJtULOCHGA2vimkU", 1140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "AqC2sr44HVueGzgW13zHvJkqOEBWA8XA66ZEb3EoL1ehypSnJ07cFoWZlO8kf3k57L1fuHFWJ6quEdLXQaT9SJKHlUaYQvanvjbBlqWwaH3hODNsBGoK0DatpoQ+FxcSkdVE/ki3rbEUuJiZzU0BnDxH+Q6FiNsBaJuwau29w24MlD28ELJsjCcUVwtTQkaNtUxIlFKHLj0++T+IVrQH8KZlmVLvDefJ6llWbrFNVuh674HfKr/GEUatG6KI4gWNtGKKRYh76mMl5xH5qDfBZqxyRaKylJaDIYbx5xP5I4DDm4gOnxH+h/Pu6dq6FJ/U3eDio/KQ9xwFqTuyjH0BIRBsvWWgbTNURVBheq+am92YBhkj1QmdKTxQ9fQM55O8DpyWzRhky0NevM9j", 1150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "qkFfS3WfLyj3QTQT9i/s57uOPQCTN1jrab8bwxaxyeYUlz2tEtYyKGGUufua8WzdBT2VvWTvH0JkK0LfUJ+vChvcnMFna+tEaCKCFMIOWMLYVZSJDcYMIqaIr8d0Bi2bpbVf5z4WNma0pbCKaXpkYgeg1Sb8HpKG0p0fAez7Q/QRASlvyM5vuIOH8/CM4fF5Ga6aWkTRG0lfxiyeZ2vi3q7uNmsZF490J79r/6tnPPXIIC4XGnijwho5NmhZG0XcQeyW5KnT7VmGACFdTHOb9oS5WxZZU29/oZ5Y23rBBoSDX/xZ1LNFiZk6Xfl4ih207jzogv+3nOro93JHQydNeKEwxOtbKqEe7WWJLDw/EzVdJTODrhBYKbjUce10XsavuiTvv+H1Qh4lo2Vx", 1160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "O900/Gn82AjyLYqiWZ4ILXBBv/ZaXpTpQL0p9nv7gwF2MWsS2OWEImcVDa+1ElrjUumG6CVEv/rvax53krqJJDg+4Z/XcHxv58w6hNrXiWqFNjxlu5RZHvj1oQQXnS2n8qw8e/c+8ea2TiDIVr4OmgZz1G9uSPBeOZJvySqdgNPMpgfjZwkL2ez9/x31sLuQxi/FW3DFXU6kGSUjaq8g/iGXlaaAcQ0t9Gy+y005Z9wpr2JWWzishL+1JZp9D4SY/r3NHDphN4MNdLHMNBRPSIgfsaSqfLraIt+zWIycsd+nksVxtPv9wcyXy51E1qlHr6Uygz2VZYD9q9zyxEX4wRP2VEewHYUomL9d1F6gGG5fN3z82bQ4hI9uDirWhneWazUOQBRud5otPOm9", 1170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "C3c+d5Q9lyTafPLdelG1TKaLFinw1TOjyI6KkrQyHKkttfnO58WFvScl1TiRcB/iHxKahskoE2+VRLUIhctuDU4sUvQh/g9Arw0LAA4QTxuLFt01XYdigurz4FT15ox2oDGGGrRb3VGjDTXK1OWVJoLMW95EVqyMc9F+Fdej85LHE+8WesIfacjUQtTG1tzYVQTfubZq0+qxXws8QrxMLFtVE38tbeXo+Ok1/U5TUa6FjWflEfvKY3XVcl8RKkXua7fVz/Blj8Gh+dWe2cOxa0lpM75ZHyz9adQrB2Pb4571E4u2xI5un0R0MFJZBQuPDc1G5rPhyk+Hb4LRG3dS0m8IASQUOskv93z978L1+Abu9CLP6d6s5p+BzWxhMUqwQXC/CCpTywrkJ0RG", 1180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel }; 1190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel @Override 1210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel protected void setUp() throws Exception { 1220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel super.setUp(); 1230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 1240f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel @Override 1260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel protected void tearDown() throws Exception { 1270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel super.tearDown(); 1280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 1290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public static Test suite() { 1310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel TestSuite suite = new TestSuite(AntiSamyTest.class); 1320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel return suite; 1330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 1340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testCompareSpeeds() throws Exception { 13671e338dfcc6a93fa5b28c53270e618f6235bba88mikesamuel if (DISABLE_INTERNETS) { return; } 137c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel 1386d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel long totalTime = 0; 1396d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel long averageTime = 0; 1400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel int testReps = 15; 1420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel for (String url : new String[] { 1440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "http://slashdot.org/", "http://www.fark.com/", 1450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "http://www.cnn.com/", "http://google.com/", 1460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "http://www.microsoft.com/en/us/default.aspx", 1470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "http://deadspin.com/", 1480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel }) { 1490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel URLConnection conn = new URL(url).openConnection(); 150d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel String ct = guessCharsetFromContentType(conn.getContentType()); 151489a0ec7301a86af8497d24748336db09ca278damikesamuel InputStreamReader in = new InputStreamReader(conn.getInputStream(), ct); 1520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel StringBuilder out = new StringBuilder(); 1530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel char[] buffer = new char[5000]; 1540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel int read = 0; 1550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel do { 1560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel read = in.read(buffer, 0, buffer.length); 1570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (read > 0) { 1580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel out.append(buffer, 0, read); 1590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 1600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } while (read >= 0); 1610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel in.close(); 1630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String html = out.toString(); 1650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel System.out.println("About to scan: " + url + " size: " + html.length()); 1670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (html.length() > 640000) { 1680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel System.out.println(" -Maximum input size 640000 exceeded. SKIPPING."); 1690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel continue; 1700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 1710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1726d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel long startTime = 0; 1736d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel long endTime = 0; 1740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel for (int j = 0; j < testReps; j++) { 1766d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel startTime = System.nanoTime(); 1770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sanitize(html); 1786d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel endTime = System.nanoTime(); 1790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1806d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel System.out.println( 1816d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel " Took " + ((endTime - startTime) / 1000000) + " ms"); 1820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel totalTime = totalTime + (endTime - startTime); 1830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 1840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel averageTime = totalTime / testReps; 1860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 1870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1886d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel System.out.println("Total time ms: " + totalTime/1000000L); 1896d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel System.out.println("Average time per rep ms: " + averageTime/1000000L); 1900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 1910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 1920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 1930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * Test basic XSS cases. 1940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 195afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 1960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testScriptAttacks() throws Exception { 1970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("test<script>alert(document.cookie)</script>", "script"); 1980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("test<script>alert(document.cookie)</script>", "script"); 1990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 2000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<<<><<script src=http://fake-evil.ru/test.js>", "<script"); 2010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<<<><<script src=http://fake-evil.ru/test.js>", "<script"); 2020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 2030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<script<script src=http://fake-evil.ru/test.js>>", "<script"); 2040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<script<script src=http://fake-evil.ru/test.js>>", "<script"); 205afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 2070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 208afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", "onload"); 2100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", "onload"); 211afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BODY ONLOAD=alert('XSS')>", "alert"); 2130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BODY ONLOAD=alert('XSS')>", "alert"); 214afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<iframe src=http://ha.ckers.org/scriptlet.html <", "<iframe"); 2160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<iframe src=http://ha.ckers.org/scriptlet.html <", "<iframe"); 217afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">", "src"); 2190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">", "src"); 220afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<a onblur=\"alert(secret)\" href=\"http://www.google.com\">Google</a>", "alert"); 2220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<a onblur=\"alert(secret)\" href=\"http://www.google.com\">Google</a>", "alert"); 2230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 224afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testImgAttacks() throws Exception { 2260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain("<img src=\"http://www.myspace.com/img.gif\"/>", "<img"); 2270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain("<img src=\"http://www.myspace.com/img.gif\"/>", "<img"); 228afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<img src=javascript:alert(document.cookie)>", "<img"); 230afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC=javascript:alert('XSS')>", "<img"); 2320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC=javascript:alert('XSS')>", "<img"); 233afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC='javascript:alert('XSS')'>", "src"); 2350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC='javascript:alert('XSS')'>", "src"); 236afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC=\"jav
ascript:alert('XSS');\">", "alert"); 2380f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC=\"jav
ascript:alert('XSS');\">", "alert"); 239afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "<IMG SRC=javascript:alert('XSS')>"; 2410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (sanitize(s).length() != 0) { 2420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(s, "&"); 2430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 2440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<IMG SRC=javascript:alert('XSS')>"; 2450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (sanitize(s).length() != 0) { 2460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(s, "&"); 2470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 248afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sanitize("<IMG SRC=javascript:alert('XSS')>"); 2500f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sanitize("<IMG SRC=javascript:alert('XSS')>"); 251afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC=\"javascript:alert('XSS')\"", "javascript"); 2530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC=\"javascript:alert('XSS')\"", "javascript"); 254afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG LOWSRC=\"javascript:alert('XSS')\">", "javascript"); 2560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG LOWSRC=\"javascript:alert('XSS')\">", "javascript"); 257afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BGSOUND SRC=\"javascript:alert('XSS');\">", "javascript"); 2590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BGSOUND SRC=\"javascript:alert('XSS');\">", "javascript"); 2600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 261afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testHrefAttacks() throws Exception { 2630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">", "href"); 2640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">", "href"); 265afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">", "href"); 2670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">", "href"); 268afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>", "ha.ckers.org"); 2700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>", "ha.ckers.org"); 271afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>", "ha.ckers.org"); 2730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>", "ha.ckers.org"); 274afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<STYLE>li {list-style-image: url(\"javascript:alert('XSS')\");}</STYLE><UL><LI>XSS", "javascript"); 2760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<STYLE>li {list-style-image: url(\"javascript:alert('XSS')\");}</STYLE><UL><LI>XSS", "javascript"); 277afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC='vbscript:msgbox(\"XSS\")'>", "vbscript"); 2790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG SRC='vbscript:msgbox(\"XSS\")'>", "vbscript"); 280afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">", "<meta"); 2820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">", "<meta"); 283afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">", "<meta"); 2850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">", "<meta"); 286afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K\">", "<meta"); 2880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K\">", "<meta"); 289afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IFRAME SRC=\"javascript:alert('XSS');\"></IFRAME>", "iframe"); 2910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IFRAME SRC=\"javascript:alert('XSS');\"></IFRAME>", "iframe"); 292afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<FRAMESET><FRAME SRC=\"javascript:alert('XSS');\"></FRAMESET>", "javascript"); 2940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<FRAMESET><FRAME SRC=\"javascript:alert('XSS');\"></FRAMESET>", "javascript"); 295afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<TABLE BACKGROUND=\"javascript:alert('XSS')\">", "background"); 2970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<TABLE BACKGROUND=\"javascript:alert('XSS')\">", "background"); 298afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 2990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">", "background"); 3000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">", "background"); 301afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">", "javascript"); 3030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">", "javascript"); 304afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<DIV STYLE=\"width: expression(alert('XSS'));\">", "alert"); 3060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<DIV STYLE=\"width: expression(alert('XSS'));\">", "alert"); 307afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">", "alert"); 3090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">", "alert"); 310afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<STYLE>@im\\port'\\ja\\vasc\\ript:alert(\"XSS\")';</STYLE>", "ript:alert"); 3120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<STYLE>@im\\port'\\ja\\vasc\\ript:alert(\"XSS\")';</STYLE>", "ript:alert"); 313afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BASE HREF=\"javascript:alert('XSS');//\">", "javascript"); 3150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BASE HREF=\"javascript:alert('XSS');//\">", "javascript"); 316afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BaSe hReF=\"http://arbitrary.com/\">", "<base"); 3180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<BaSe hReF=\"http://arbitrary.com/\">", "<base"); 319afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<OBJECT TYPE=\"text/x-scriptlet\" DATA=\"http://ha.ckers.org/scriptlet.html\"></OBJECT>", "<object"); 3210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<OBJECT TYPE=\"text/x-scriptlet\" DATA=\"http://ha.ckers.org/scriptlet.html\"></OBJECT>", "<object"); 322afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<OBJECT classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></OBJECT>", "javascript"); 324afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<OBJECT classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></OBJECT>", "javascript"); 326afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<EMBED SRC=\"http://ha.ckers.org/xss.swf\" AllowScriptAccess=\"always\"></EMBED>", "<embed"); 3280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<EMBED SRC=\"http://ha.ckers.org/xss.swf\" AllowScriptAccess=\"always\"></EMBED>", "<embed"); 329afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<EMBED SRC=\"data:image/svg+xml;base64,PHN2ZyB4bWxuczpzdmc9Imh0dH A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==\" type=\"image/svg+xml\" AllowScriptAccess=\"always\"></EMBED>", "<embed"); 3310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<EMBED SRC=\"data:image/svg+xml;base64,PHN2ZyB4bWxuczpzdmc9Imh0dH A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==\" type=\"image/svg+xml\" AllowScriptAccess=\"always\"></EMBED>", "<embed"); 332afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT a=\">\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 3340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT a=\">\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 335afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT a=\">\" '' SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 3370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT a=\">\" '' SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 338afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3390f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT a=`>` SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 3400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT a=`>` SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 341afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT a=\">'>\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 3430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT a=\">'>\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); 344afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT>document.write(\"<SCRI\");</SCRIPT>PT SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "script"); 3460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT>document.write(\"<SCRI\");</SCRIPT>PT SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "script"); 347afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT SRC=http://ha.ckers.org/xss.js", "<script"); 3490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<SCRIPT SRC=http://ha.ckers.org/xss.js", "<script"); 350afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<div/style=\-\mo\z\-b\i\nd\in\g:\url(//business\i\nfo.co.uk\/labs\/xbl\/xbl\.xml\#xss)&>", "style"); 3520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<div/style=\-\mo\z\-b\i\nd\in\g:\url(//business\i\nfo.co.uk\/labs\/xbl\/xbl\.xml\#xss)&>", "style"); 353afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<a href='aim: &c:\\windows\\system32\\calc.exe' ini='C:\\Documents and Settings\\All Users\\Start Menu\\Programs\\Startup\\pwnd.bat'>", "aim.exe"); 3550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<a href='aim: &c:\\windows\\system32\\calc.exe' ini='C:\\Documents and Settings\\All Users\\Start Menu\\Programs\\Startup\\pwnd.bat'>", "aim.exe"); 356afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<!--\n<A href=\n- --><a href=javascript:alert:document.domain>test-->", "javascript"); 3580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<!--\n<A href=\n- --><a href=javascript:alert:document.domain>test-->", "javascript"); 359afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<a></a style=\"\"xx:expr/**/ession(document.appendChild(document.createElement('script')).src='http://h4k.in/i.js')\">", "document"); 3610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<a></a style=\"\"xx:expr/**/ession(document.appendChild(document.createElement('script')).src='http://h4k.in/i.js')\">", "document"); 3620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 363afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 3650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * Test CSS protections. 3660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 367afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testCssAttacks() throws Exception { 369afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<div style=\"position:absolute\">", "position"); 3710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<div style=\"position:absolute\">", "position"); 372afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<style>b { position:absolute }</style>", "position"); 3740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<style>b { position:absolute }</style>", "position"); 375afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<div style=\"z-index:25\">test</div>", "z-index"); 3770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<div style=\"z-index:25\">test</div>", "z-index"); 378afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<style>z-index:25</style>", "z-index"); 3800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<style>z-index:25</style>", "z-index"); 3810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 382afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 3840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * Test a bunch of strings that have tweaked the XML parsing capabilities of 3850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * NekoHTML. 3860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 3870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testIllegalXML() throws Exception { 3880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel for (int i = 0; i < BASE64_BAD_XML_STRINGS.length; i++) { 389489a0ec7301a86af8497d24748336db09ca278damikesamuel String testStr = new String( 390489a0ec7301a86af8497d24748336db09ca278damikesamuel Base64.decodeBase64(BASE64_BAD_XML_STRINGS[i]), 391489a0ec7301a86af8497d24748336db09ca278damikesamuel "UTF-8"); 3924e867904c8295537803c1c8a076e130df5674b58mikesamuel sanitize(testStr); 3934e867904c8295537803c1c8a076e130df5674b58mikesamuel sanitize(testStr); 3940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 395afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 3960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // These fail in AntiSamy due to a bug in NekoHTML 3970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals( 3980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "<a href=\"http://www.test.com\" rel=\"nofollow\"></a>", 3990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sanitize("<a . href=\"http://www.test.com\">")); 4000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals( 4010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "<a href=\"http://www.test.com\" rel=\"nofollow\"></a>", 4020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sanitize("<a - href=\"http://www.test.com\">")); 403afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 4040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertTrue(sanitize("<style>") != null); 4050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 406afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 4070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testPreviousBugs() throws Exception { 408afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 4090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 4100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * issues 12 (and 36, which was similar). empty tags cause display 4110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * problems/"formjacking" 4120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 413afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 4140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 4150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel Pattern p = Pattern.compile(".*<strong(\\s*)/>.*", Pattern.DOTALL); 4160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s1 = sanitize("<br ><strong></strong><a>hello world</a><b /><i/><hr>"); 4170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s2 = sanitize("<br ><strong></strong><a>hello world</a><b /><i/><hr>"); 418afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 4190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertFalse(p.matcher(s1).matches()); 420afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 4210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel p = Pattern.compile(".*<b(\\s*)/>.*"); 4220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertFalse(p.matcher(s1).matches()); 4230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertFalse(p.matcher(s2).matches()); 424afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com 4250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel p = Pattern.compile(".*<i(\\s*)/>.*"); 4260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertFalse(p.matcher(s1).matches()); 4270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertFalse(p.matcher(s2).matches()); 4280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel p = Pattern.compile(".*<hr(\\s*)/>.*"); 4300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertFalse(p.matcher(s1).matches()); 4310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertFalse(p.matcher(s2).matches()); 4320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 4330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #20 */ 4350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<b><i>Some Text</b></i>", "<i />"); 4360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("<b><i>Some Text</b></i>", "<i />"); 4380f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4390f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #25 */ 4410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals( 4420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "<div>Test</div>", sanitize("<div style=\"margin: -5em\">Test</div>")); 4430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #28 */ 4464e867904c8295537803c1c8a076e130df5674b58mikesamuel assertSanitizedDoesContain( 4474e867904c8295537803c1c8a076e130df5674b58mikesamuel "<div style=\"font-family: Geneva, Arial, courier new, sans-serif\">Test</div>", 448b268f8745b09a77af2e8c77ffd376b6459bf4fecmikesamuel "font-family:'geneva' , 'arial' , 'courier new' , sans-serif"); 4490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4500f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #29 - missing quotes around properties with spaces */ 4510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 4520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "<style type=\"text/css\"><![CDATA[P {\n font-family: \"Arial Unicode MS\";\n}\n]]></style>"; 4530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(s, sanitize(s)); 4540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 4550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #30 */ 4580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 4590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "<style type=\"text/css\"><![CDATA[P { margin-bottom: 0.08in; } ]]></style>"; 4600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = sanitize(s); 4620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // followup - does the patch fix multiline CSS? 4640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s2 = "<style type=\"text/css\"><![CDATA[\r\nP {\r\n margin-bottom: 0.08in;\r\n}\r\n]]></style>"; 4650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<style type=\"text/css\"><![CDATA[P {\n\tmargin-bottom: 0.08in;\n}\n]]></style>", sanitize(s2)); 4660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // next followup - does non-CDATA parsing still work? 4680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s3 = "<style>P {\n\tmargin-bottom: 0.08in;\n}\n"; 4700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<style>P {\n\tmargin-bottom: 0.08in;\n}\n</style>\n", sanitize(s3)); 4710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // for other 4730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // tests 4740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 4750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #32 - nekos problem */ 4770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 4780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "<SCRIPT =\">\" SRC=\"\"></SCRIPT>"; 4790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sanitize(s); 4800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sanitize(s); 4810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 4820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #37 - OOM */ 4840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 4850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String dirty = "<a onblur=\"try {parent.deselectBloggerImageGracefully();}" + "catch(e) {}\"" 4860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel + "href=\"http://www.charityadvantage.com/ChildrensmuseumEaston/images/BookswithBill.jpg\"><img" + "style=\"FLOAT: right; MARGIN: 0px 0px 10px 10px; WIDTH: 150px; CURSOR:" 4870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel + "hand; HEIGHT: 100px\" alt=\"\"" + "src=\"http://www.charityadvantage.com/ChildrensmuseumEaston/images/BookswithBill.jpg\"" 4880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel + "border=\"0\" /></a><br />Poor Bill, couldn't make it to the Museum's <span" + "class=\"blsp-spelling-corrected\" id=\"SPELLING_ERROR_0\">story time</span>" 4890f3a7565157c70edb1935f04888fdc0407397fabmikesamuel + "today, he was so busy shoveling! Well, we sure missed you Bill! So since" + "ou were busy moving snow we read books about snow. We found a clue in one" 4900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel + "book which revealed a snowplow at the end of the story - we wish it had" + "driven to your driveway Bill. We also read a story which shared fourteen" 4910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel + "<em>Names For Snow. </em>We'll catch up with you next week....wonder which" + "hat Bill will wear?<br />Jane"; 4920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = sanitize(dirty); 4940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertNotNull(s); 4950f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 4960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 4970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #38 - color problem/color combinations */ 4980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 4990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "<font color=\"#fff\">Test</font>"; 5000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String expected = "<font color=\"#fff\">Test</font>"; 5010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5044e867904c8295537803c1c8a076e130df5674b58mikesamuel s = "<div style=\"color: #fff\">Test 3 letter code</div>"; 505c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel expected = "<div style=\"color:#fff\">Test 3 letter code</div>"; 5064e867904c8295537803c1c8a076e130df5674b58mikesamuel assertEquals(expected, sanitize(s)); 5074e867904c8295537803c1c8a076e130df5674b58mikesamuel assertEquals(expected, sanitize(s)); 5080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<font color=\"red\">Test</font>"; 5100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel expected = "<font color=\"red\">Test</font>"; 5110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5144e867904c8295537803c1c8a076e130df5674b58mikesamuel s = "<font color=\"neonpink\">Test</font>"; 5154e867904c8295537803c1c8a076e130df5674b58mikesamuel expected = s; 5164e867904c8295537803c1c8a076e130df5674b58mikesamuel assertEquals(expected, sanitize(s)); 5174e867904c8295537803c1c8a076e130df5674b58mikesamuel assertEquals(expected, sanitize(s)); 5180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 5200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<font color=\"#0000\">Test</font>"; 5210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel expected = "<font>Test</font>"; 5220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5240f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 5250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 5270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<div style=\"color: #0000\">Test</div>"; 5284e867904c8295537803c1c8a076e130df5674b58mikesamuel expected = "<div>Test</div>"; 5290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 5320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<font color=\"#000000\">Test</font>"; 5340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel expected = "<font color=\"#000000\">Test</font>"; 5350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, sanitize(s)); 5370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5384e867904c8295537803c1c8a076e130df5674b58mikesamuel s = "<div style=\"color: #000000\">Test</div>"; 539c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel expected = "<div style=\"color:#000000\">Test</div>"; 5404e867904c8295537803c1c8a076e130df5674b58mikesamuel assertEquals(expected, sanitize(s)); 5414e867904c8295537803c1c8a076e130df5674b58mikesamuel assertEquals(expected, sanitize(s)); 5420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<b><u>foo<style><script>alert(1)</script></style>@import 'x';</u>bar"; 5440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sanitize(s); 5450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 5460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #40 - handling <style> media attributes right */ 5480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 5500f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain("<style media=\"print, projection, screen\"> P { margin: 1em; }</style>", "print, projection, screen"); 5510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 5520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #41 - comment handling */ 5540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 5560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("text ", sanitize("text <!-- comment -->")); 5570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("text ", sanitize("text <!-- comment -->")); 5580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<div>text </div>", sanitize("<div>text <!-- comment --></div>")); 5610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<div>text </div>", sanitize("<div>text <!-- comment --></div>")); 5620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<div>text </div>", sanitize("<div>text <!--[if IE]> comment <[endif]--></div>")); 5640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<div>text </div>", sanitize("<div>text <!--[if IE]> comment <[endif]--></div>")); 5650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 5670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * Check to see how nested conditional comments are handled. This is 5680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * not very clean but the main goal is to avoid any tags. Not sure 5690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * on encodings allowed in comments. 5700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 5710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String input = "<div>text <!--[if IE]> <!--[if gte 6]> comment <[endif]--><[endif]--></div>"; 5720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String expected = "<div>text <[endif]--></div>"; 5730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String output = sanitize(input); 5740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, output); 5750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel input = "<div>text <!--[if IE]> <!--[if gte 6]> comment <[endif]--><[endif]--></div>"; 5770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel expected = "<div>text <[endif]--></div>"; 5780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel output = sanitize(input); 5790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(expected, output); 5800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 5820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * Regular comment nested inside conditional comment. Test makes 5830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * sure 5840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 5850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<div>text comment <[endif]--></div>", sanitize("<div>text <!--[if IE]> <!-- IE specific --> comment <[endif]--></div>")); 5860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 5880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * These play with whitespace and have invalid comment syntax. 5890f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 5900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<div>text </div>", sanitize("<div>text <!-- [ if lte 6 ]>\ncomment <[ endif\n]--></div>")); 5910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<div>text comment </div>", sanitize("<div>text <![if !IE]> comment <![endif]></div>")); 5920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("<div>text comment </div>", sanitize("<div>text <![ if !IE]> comment <![endif]></div>")); 5930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String attack = "[if lte 8]<script>"; 5950f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String spacer = "<![if IE]>"; 5960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel StringBuffer sb = new StringBuffer(); 5980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 5990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sb.append("<div>text<!"); 6000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel for (int i = 0; i < attack.length(); i++) { 6020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sb.append(attack.charAt(i)); 6030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sb.append(spacer); 6040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel sb.append("<![endif]>"); 6070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = sb.toString(); 6090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(s, "<script"); 6110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(s, "<script"); 6120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 6150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * issue #44 - childless nodes of non-allowed elements won't cause an error 6160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 6170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 6180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "<iframe src='http://foo.com/'></iframe>" + "<script src=''></script>" + "<link href='/foo.css'>"; 6190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(s, "", sanitize(s)); 6200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #51 - offsite urls with () are found to be invalid */ 6230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain( 6240f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "<a href='http://subdomain.domain/(S(ke0lpq54bw0fvp53a10e1a45))/MyPage.aspx'>test</a>", "("); 6250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #56 - unnecessary spaces */ 6274e867904c8295537803c1c8a076e130df5674b58mikesamuel { 6280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "<SPAN style='font-weight: bold;'>Hello World!</SPAN>"; 6294e867904c8295537803c1c8a076e130df5674b58mikesamuel assertEquals( 630c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel "<span style=\"font-weight:bold\">Hello World!</span>", 6314e867904c8295537803c1c8a076e130df5674b58mikesamuel sanitize(s)); 6320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #58 - input not in list of allowed-to-be-empty tags */ 6350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 6360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "tgdan <input/> g h"; 6370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals("tgdan g h", sanitize(s)); 6380f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6390f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #61 - input has newline appended if ends with an accepted tag */ 6420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 6430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String dirtyInput = "blah <b>blah</b>."; 6440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = sanitize(dirtyInput); 6450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(dirtyInput, s); 6460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* issue #69 - char attribute should allow single char or entity ref */ 6490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6504e867904c8295537803c1c8a076e130df5674b58mikesamuel { 6510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String s = "<td char='.'>test</td>"; 6520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(s, "char"); 6530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(s, "char"); 6540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<td char='..'>test</td>"; 6560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(s, "char"); 6570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(s, "char"); 6580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<td char='"'>test</td>"; 6600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(s, "char"); 6610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(s, "char"); 6620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<td char='"a'>test</td>"; 6640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(s, "char"); 6650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(s, "char"); 6660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel s = "<td char='"&'>test</td>"; 6680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(s, "char"); 6690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(s, "char"); 6700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* privately disclosed issue - cdata bypass */ 6730f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 6740f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String malInput = "<![CDATA[]><script>alert(1)</script>]]>"; 6750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(malInput, "<script"); 6770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* this test is for confirming literal-lists work as 6800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * advertised. it turned out to be an invalid / non- 6810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * reproducible bug report but the test seemed useful 6820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * enough to keep. 6830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 6840f3a7565157c70edb1935f04888fdc0407397fabmikesamuel { 6850f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String malInput = "hello<p align='invalid'>world</p>"; 6860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain(malInput, "invalid"); 6870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String goodInput = "hello<p align='left'>world</p>"; 6890f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 6900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(goodInput, "left"); 6910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 6940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 6950f3a7565157c70edb1935f04888fdc0407397fabmikesamuel /* 6960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * Tests cases dealing with nofollowAnchors directive. Assumes anchor tags 6970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel * have an action set to "validate" (may be implicit) in the policy file. 6980f3a7565157c70edb1935f04888fdc0407397fabmikesamuel */ 6990f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testNofollowAnchors() throws Exception { 7000f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // adds when not present 7010f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized("<a href=\"blah\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); 7020f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7030f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // adds properly even with bad attr 7040f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized("<a href=\"blah\" bad=\"true\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); 7050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7060f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // rel with bad value gets corrected 7070f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized("<a href=\"blah\" rel=\"blh\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); 7080f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7090f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // correct attribute doesnt get messed with 7100f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized("<a href=\"blah\" rel=\"nofollow\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); 7110f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7120f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // if two correct attributes, only one remaining after scan 7130f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized("<a href=\"blah\" rel=\"nofollow\" rel=\"nofollow\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); 7140f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7150f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // test if value is off - does it add? 7160f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesNotContain("a href=\"blah\">link</a>", "nofollow"); 7170f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7180f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7190f3a7565157c70edb1935f04888fdc0407397fabmikesamuel public void testValidateParamAsEmbed() throws Exception { 7200f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // let's start with a YouTube embed 7210f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; 7220f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" /><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" width=\"560\" /></object>"; 7230f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 7240f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(input, expectedOutput); 7250f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } else { 7260f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized(input, ""); 7270f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7280f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7290f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; 7300f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 7310f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(input, saxExpectedOutput); 7320f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } else { 7330f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized(input, ""); 7340f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7350f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7360f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // now what if someone sticks malicious URL in the value of the 7370f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // value attribute in the param tag? remove that param tag 7380f3a7565157c70edb1935f04888fdc0407397fabmikesamuel input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://supermaliciouscode.com/badstuff.swf\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; 7390f3a7565157c70edb1935f04888fdc0407397fabmikesamuel expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" width=\"560\" /></object>"; 7400f3a7565157c70edb1935f04888fdc0407397fabmikesamuel saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; 7410f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 7420f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(input, expectedOutput); 7430f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } else { 7440f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized(input, ""); 7450f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7460f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7470f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 7480f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertTrue(sanitize(input).equals(saxExpectedOutput)); 7490f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } else { 7500f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized(input, ""); 7510f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7520f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7530f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // now what if someone sticks malicious URL in the value of the src 7540f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // attribute in the embed tag? remove that embed tag 7550f3a7565157c70edb1935f04888fdc0407397fabmikesamuel input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://hereswhereikeepbadcode.com/ohnoscary.swf\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; 7560f3a7565157c70edb1935f04888fdc0407397fabmikesamuel expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" /><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /></object>"; 7570f3a7565157c70edb1935f04888fdc0407397fabmikesamuel saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"></object>"; 7580f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7590f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 7600f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(input, expectedOutput); 7610f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } else { 7620f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized(input, ""); 7630f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7640f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7650f3a7565157c70edb1935f04888fdc0407397fabmikesamuel if (RUN_KNOWN_FAILURES) { 7660f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitizedDoesContain(input, saxExpectedOutput); 7670f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } else { 7680f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertSanitized(input, ""); 7690f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7700f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7710f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 7720f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 773be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel private static void assertSanitizedDoesNotContain( 7744e867904c8295537803c1c8a076e130df5674b58mikesamuel String html, String dangerousContent) { 7750f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String sanitized = sanitize(html); 7760f3a7565157c70edb1935f04888fdc0407397fabmikesamuel int index = Strings.toLowerCase(sanitized).indexOf( 7770f3a7565157c70edb1935f04888fdc0407397fabmikesamuel Strings.toLowerCase(dangerousContent)); 7780f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals( 7790f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "`" + sanitized + "` from `" + html + "` contains `" + 7800f3a7565157c70edb1935f04888fdc0407397fabmikesamuel dangerousContent + "`", 7810f3a7565157c70edb1935f04888fdc0407397fabmikesamuel -1, index); 7820f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7830f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 784be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel private static void assertSanitizedDoesContain( 7854e867904c8295537803c1c8a076e130df5674b58mikesamuel String html, String dangerousContent) { 7860f3a7565157c70edb1935f04888fdc0407397fabmikesamuel String sanitized = sanitize(html); 7870f3a7565157c70edb1935f04888fdc0407397fabmikesamuel int index = Strings.toLowerCase(sanitized).indexOf( 7880f3a7565157c70edb1935f04888fdc0407397fabmikesamuel Strings.toLowerCase(dangerousContent)); 7890f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertTrue( 7900f3a7565157c70edb1935f04888fdc0407397fabmikesamuel "`" + sanitized + "` from `" + html + "` does not contain `" + 7910f3a7565157c70edb1935f04888fdc0407397fabmikesamuel dangerousContent + "`", 7920f3a7565157c70edb1935f04888fdc0407397fabmikesamuel index >= 0); 7930f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 7940f3a7565157c70edb1935f04888fdc0407397fabmikesamuel 795be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel private static void assertSanitized(String html, String sanitized) { 7960f3a7565157c70edb1935f04888fdc0407397fabmikesamuel assertEquals(sanitized, sanitize(html)); 7970f3a7565157c70edb1935f04888fdc0407397fabmikesamuel } 798d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel 799d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel private static String guessCharsetFromContentType(String contentType) { 800d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel Matcher m = Pattern.compile(";\\s*charset=(?:\"([^\"]*)\"|([^\\s;]*))") 801d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel .matcher(contentType); 802d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel if (m.find()) { 803d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel String ct; 804d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel ct = m.group(1); 805d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel if (ct != null) { return ct; } 806d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel ct = m.group(2); 807d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel if (ct != null) { return ct; } 808d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel } 809d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel return "UTF-8"; 810d9475f7aaf4b2d9f95e815869680fff9a0474fd6mikesamuel } 811afd4893acdc6af7f4e1e812dbd6ffcff58743d87manico.james@gmail.com}