1// Copyright (c) 2012, Mike Samuel 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions 6// are met: 7// 8// Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// Redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution. 13// Neither the name of the OWASP nor the names of its contributors may 14// be used to endorse or promote products derived from this software 15// without specific prior written permission. 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27// POSSIBILITY OF SUCH DAMAGE. 28 29package org.owasp.html; 30 31import org.junit.Test; 32 33import junit.framework.TestCase; 34 35public class EncodingTest extends TestCase { 36 37 @Test 38 public static final void testDecodeHtml() { 39 String html = 40 "The quick brown fox
jumps over
 the lazy dog
"; 41 // 1 2 3 4 5 6 42 // 123456789012345678901234567890123456789012345678901234567890123456789 43 String golden = 44 "The quick\u00a0brown fox\njumps over\r\nthe lazy dog\n"; 45 assertEquals(golden, Encoding.decodeHtml(html)); 46 47 // Don't allocate a new string when no entities. 48 assertSame(golden, Encoding.decodeHtml(golden)); 49 50 // test interrupted escapes and escapes at end of file handled gracefully 51 assertEquals( 52 "\\\\u000a", 53 Encoding.decodeHtml("\\\\u000a")); 54 assertEquals( 55 "\n", 56 Encoding.decodeHtml("
")); 57 assertEquals( 58 "\n", 59 Encoding.decodeHtml("
")); 60 assertEquals( 61 "\n", 62 Encoding.decodeHtml("
")); 63 assertEquals( 64 "\n", 65 Encoding.decodeHtml("
")); 66 assertEquals( 67 String.valueOf(Character.toChars(0x10000)), 68 Encoding.decodeHtml("𐀀")); 69 assertEquals( 70 "\n", 71 Encoding.decodeHtml("
")); 72 assertEquals( 73 "�ziggy", 74 Encoding.decodeHtml("�ziggy")); 75 assertEquals( 76 "਀z;", 77 Encoding.decodeHtml("਀z;")); 78 assertEquals( 79 "&#\n", 80 Encoding.decodeHtml("&#
")); 81 assertEquals( 82 "&#x\n", 83 Encoding.decodeHtml("&#x
")); 84 assertEquals( 85 "\n\n", 86 Encoding.decodeHtml("

")); 87 assertEquals( 88 "&#\n", 89 Encoding.decodeHtml("&#
")); 90 assertEquals( 91 "&#x", 92 Encoding.decodeHtml("&#x")); 93 assertEquals( 94 "", // NUL elided. 95 Encoding.decodeHtml("�")); 96 assertEquals( 97 "&#", 98 Encoding.decodeHtml("&#")); 99 100 assertEquals( 101 "\\", 102 Encoding.decodeHtml("\\")); 103 assertEquals( 104 "&", 105 Encoding.decodeHtml("&")); 106 107 assertEquals( 108 "�a;", 109 Encoding.decodeHtml("�a;")); 110 assertEquals( 111 "\n", 112 Encoding.decodeHtml(" ")); 113 assertEquals( 114 "\n", 115 Encoding.decodeHtml("
")); 116 assertEquals( 117 "\n", 118 Encoding.decodeHtml("
")); 119 assertEquals( 120 "\t", 121 Encoding.decodeHtml("	")); 122 assertEquals( 123 "\n", 124 Encoding.decodeHtml("
")); 125 assertEquals( 126 "�ziggy", 127 Encoding.decodeHtml("�ziggy")); 128 assertEquals( 129 "&#\n", 130 Encoding.decodeHtml("&#
")); 131 assertEquals( 132 "\n", 133 Encoding.decodeHtml("�
")); 134 assertEquals( 135 "\n", 136 Encoding.decodeHtml(" ")); 137 assertEquals( 138 "&#\n", 139 Encoding.decodeHtml("&# ")); 140 assertEquals( 141 "", // Invalid XML char elided. 142 Encoding.decodeHtml("")); 143 assertEquals( 144 "\t", 145 Encoding.decodeHtml("	")); 146 assertEquals( 147 "\n", 148 Encoding.decodeHtml("
")); 149 150 // test the named escapes 151 assertEquals( 152 "<", 153 Encoding.decodeHtml("<")); 154 assertEquals( 155 ">", 156 Encoding.decodeHtml(">")); 157 assertEquals( 158 "\"", 159 Encoding.decodeHtml(""")); 160 assertEquals( 161 "'", 162 Encoding.decodeHtml("'")); 163 assertEquals( 164 "'", 165 Encoding.decodeHtml("'")); 166 assertEquals( 167 "'", 168 Encoding.decodeHtml("'")); 169 assertEquals( 170 "&", 171 Encoding.decodeHtml("&")); 172 assertEquals( 173 "<", 174 Encoding.decodeHtml("&lt;")); 175 assertEquals( 176 "&", 177 Encoding.decodeHtml("&")); 178 assertEquals( 179 "&", 180 Encoding.decodeHtml("&")); 181 assertEquals( 182 "&", 183 Encoding.decodeHtml("&AmP;")); 184 assertEquals( 185 "\u0391", 186 Encoding.decodeHtml("Α")); 187 assertEquals( 188 "\u03b1", 189 Encoding.decodeHtml("α")); 190 191 assertEquals( 192 "&;", 193 Encoding.decodeHtml("&;")); 194 assertEquals( 195 "&bogus;", 196 Encoding.decodeHtml("&bogus;")); 197 } 198 199 @Test 200 public static final void testAppendNumericEntityAndEncodeOnto() 201 throws Exception { 202 StringBuilder sb = new StringBuilder(); 203 StringBuilder cps = new StringBuilder(); 204 for (int codepoint : new int[] { 205 0, 9, '\n', '@', 0x80, 0xff, 0x100, 0xfff, 0x1000, 0x123a, 0xffff, 206 0x10000, Character.MAX_CODE_POINT }) { 207 Encoding.appendNumericEntity(codepoint, sb); 208 sb.append(' '); 209 210 cps.appendCodePoint(codepoint).append(' '); 211 } 212 213 assertEquals( 214 "� 	 @ € ÿ Ā ࿿ က " 215 + "ሺ  𐀀  ", 216 sb.toString()); 217 218 StringBuilder out = new StringBuilder(); 219 Encoding.encodeHtmlOnto(cps.toString(), out); 220 assertEquals( 221 " \t \n @ \u0080 \u00ff \u0100 \u0fff \u1000 " 222 + "\u123a 𐀀  ", 223 out.toString()); 224 } 225 226 private static final void assertStripped(String stripped, String orig) { 227 String actual = Encoding.stripBannedCodeunits(orig); 228 assertEquals(orig, stripped, actual); 229 if (stripped.equals(orig)) { 230 assertSame(actual, orig); 231 } 232 233 StringBuilder sb = new StringBuilder(orig); 234 Encoding.stripBannedCodeunits(sb); 235 assertEquals(orig, stripped, sb.toString()); 236 } 237 238 @Test 239 public static final void testStripBannedCodeunits() { 240 assertStripped("", ""); 241 assertStripped("foo", "foo"); 242 assertStripped("foobar", "foo\u0000bar"); 243 assertStripped("foobar", "foo\u0000bar\u0000"); 244 assertStripped("foobar", "foo\ufffebar\u0008"); 245 assertStripped("foobar", "foo\ud800bar\udc00"); 246 assertStripped("foo\ud800\udc00bar", "foo\ud800\ud800\udc00bar"); 247 assertStripped("foo\ud800\udc00bar", "foo\ud800\udc00\ud800bar"); 248 assertStripped("foo\ud800\udc00bar", "foo\ud800\udc00\udc00bar"); 249 assertStripped("foo\ud800\udc00bar", "foo\udc00\ud800\udc00bar"); 250 assertStripped("foo\ud834\udd1ebar", "foo\ud834\udd1ebar"); 251 assertStripped("foo\ud834\udd1e", "foo\ud834\udd1e"); 252 assertStripped("\uffef\ufffd", "\uffef\ufffd\ufffe\uffff"); 253 } 254} 255