// Copyright (c) 2012, Mike Samuel // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // // Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // Neither the name of the OWASP nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. package org.owasp.html; import org.junit.Test; import junit.framework.TestCase; public class EncodingTest extends TestCase { @Test public static final void testDecodeHtml() { String html = "The quick brown fox jumps over the lazy dog "; // 1 2 3 4 5 6 // 123456789012345678901234567890123456789012345678901234567890123456789 String golden = "The quick\u00a0brown fox\njumps over\r\nthe lazy dog\n"; assertEquals(golden, Encoding.decodeHtml(html)); // Don't allocate a new string when no entities. assertSame(golden, Encoding.decodeHtml(golden)); // test interrupted escapes and escapes at end of file handled gracefully assertEquals( "\\\\u000a", Encoding.decodeHtml("\\\\u000a")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( String.valueOf(Character.toChars(0x10000)), Encoding.decodeHtml("𐀀")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "�ziggy", Encoding.decodeHtml("�ziggy")); assertEquals( "਀z;", Encoding.decodeHtml("਀z;")); assertEquals( "&#\n", Encoding.decodeHtml("&# ")); assertEquals( "&#x\n", Encoding.decodeHtml("&#x ")); assertEquals( "\n\n", Encoding.decodeHtml(" ")); assertEquals( "&#\n", Encoding.decodeHtml("&# ")); assertEquals( "&#x", Encoding.decodeHtml("&#x")); assertEquals( "", // NUL elided. Encoding.decodeHtml("�")); assertEquals( "&#", Encoding.decodeHtml("&#")); assertEquals( "\\", Encoding.decodeHtml("\\")); assertEquals( "&", Encoding.decodeHtml("&")); assertEquals( "�a;", Encoding.decodeHtml("�a;")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "\t", Encoding.decodeHtml(" ")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "�ziggy", Encoding.decodeHtml("�ziggy")); assertEquals( "&#\n", Encoding.decodeHtml("&# ")); assertEquals( "\n", Encoding.decodeHtml("� ")); assertEquals( "\n", Encoding.decodeHtml(" ")); assertEquals( "&#\n", Encoding.decodeHtml("&# ")); assertEquals( "", // Invalid XML char elided. Encoding.decodeHtml("")); assertEquals( "\t", Encoding.decodeHtml(" ")); assertEquals( "\n", Encoding.decodeHtml(" ")); // test the named escapes assertEquals( "<", Encoding.decodeHtml("<")); assertEquals( ">", Encoding.decodeHtml(">")); assertEquals( "\"", Encoding.decodeHtml(""")); assertEquals( "'", Encoding.decodeHtml("'")); assertEquals( "'", Encoding.decodeHtml("'")); assertEquals( "'", Encoding.decodeHtml("'")); assertEquals( "&", Encoding.decodeHtml("&")); assertEquals( "<", Encoding.decodeHtml("&lt;")); assertEquals( "&", Encoding.decodeHtml("&")); assertEquals( "&", Encoding.decodeHtml("&")); assertEquals( "&", Encoding.decodeHtml("&AmP;")); assertEquals( "\u0391", Encoding.decodeHtml("Α")); assertEquals( "\u03b1", Encoding.decodeHtml("α")); assertEquals( "&;", Encoding.decodeHtml("&;")); assertEquals( "&bogus;", Encoding.decodeHtml("&bogus;")); } @Test public static final void testAppendNumericEntityAndEncodeOnto() throws Exception { StringBuilder sb = new StringBuilder(); StringBuilder cps = new StringBuilder(); for (int codepoint : new int[] { 0, 9, '\n', '@', 0x80, 0xff, 0x100, 0xfff, 0x1000, 0x123a, 0xffff, 0x10000, Character.MAX_CODE_POINT }) { Encoding.appendNumericEntity(codepoint, sb); sb.append(' '); cps.appendCodePoint(codepoint).append(' '); } assertEquals( "� @ € ÿ Ā ࿿ က " + "ሺ ￿ 𐀀 􏿿 ", sb.toString()); StringBuilder out = new StringBuilder(); Encoding.encodeHtmlOnto(cps.toString(), out); assertEquals( " \t \n @ \u0080 \u00ff \u0100 \u0fff \u1000 " + "\u123a 𐀀 􏿿 ", out.toString()); } private static final void assertStripped(String stripped, String orig) { String actual = Encoding.stripBannedCodeunits(orig); assertEquals(orig, stripped, actual); if (stripped.equals(orig)) { assertSame(actual, orig); } StringBuilder sb = new StringBuilder(orig); Encoding.stripBannedCodeunits(sb); assertEquals(orig, stripped, sb.toString()); } @Test public static final void testStripBannedCodeunits() { assertStripped("", ""); assertStripped("foo", "foo"); assertStripped("foobar", "foo\u0000bar"); assertStripped("foobar", "foo\u0000bar\u0000"); assertStripped("foobar", "foo\ufffebar\u0008"); assertStripped("foobar", "foo\ud800bar\udc00"); assertStripped("foo\ud800\udc00bar", "foo\ud800\ud800\udc00bar"); assertStripped("foo\ud800\udc00bar", "foo\ud800\udc00\ud800bar"); assertStripped("foo\ud800\udc00bar", "foo\ud800\udc00\udc00bar"); assertStripped("foo\ud800\udc00bar", "foo\udc00\ud800\udc00bar"); assertStripped("foo\ud834\udd1ebar", "foo\ud834\udd1ebar"); assertStripped("foo\ud834\udd1e", "foo\ud834\udd1e"); assertStripped("\uffef\ufffd", "\uffef\ufffd\ufffe\uffff"); } }