10df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Copyright (c) 2012, Mike Samuel 20df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// All rights reserved. 30df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// 40df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Redistribution and use in source and binary forms, with or without 50df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// modification, are permitted provided that the following conditions 60df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// are met: 70df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// 80df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Redistributions of source code must retain the above copyright 90df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// notice, this list of conditions and the following disclaimer. 100df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Redistributions in binary form must reproduce the above copyright 110df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// notice, this list of conditions and the following disclaimer in the 120df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// documentation and/or other materials provided with the distribution. 130df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Neither the name of the OWASP nor the names of its contributors may 140df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// be used to endorse or promote products derived from this software 150df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// without specific prior written permission. 160df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 170df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 180df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 190df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 200df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 210df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 220df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 230df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 240df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 250df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 260df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 270df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// POSSIBILITY OF SUCH DAMAGE. 280df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 290df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuelpackage org.owasp.html; 300df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 31be666032a113a8af92bc557add8e83579cf0ef5cmikesamuelimport org.junit.Test; 32be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel 330df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuelimport junit.framework.TestCase; 340df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 350df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuelpublic class EncodingTest extends TestCase { 360df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 37be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel @Test 38be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel public static final void testDecodeHtml() { 390df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel String html = 400df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "The quick brown fox
jumps over
 the lazy dog
"; 410df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel // 1 2 3 4 5 6 420df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel // 123456789012345678901234567890123456789012345678901234567890123456789 430df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel String golden = 440df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "The quick\u00a0brown fox\njumps over\r\nthe lazy dog\n"; 450df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals(golden, Encoding.decodeHtml(html)); 460df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 470df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel // Don't allocate a new string when no entities. 480df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertSame(golden, Encoding.decodeHtml(golden)); 490df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 500df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel // test interrupted escapes and escapes at end of file handled gracefully 510df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 520df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\\\\u000a", 530df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("\\\\u000a")); 540df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 550df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 560df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 570df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 580df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 590df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 600df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 610df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 620df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 630df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 640df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 650df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 660df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 670df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel String.valueOf(Character.toChars(0x10000)), 680df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("𐀀")); 690df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 700df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 710df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 720df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 730df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "�ziggy", 740df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("�ziggy")); 750df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 760df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "਀z;", 770df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("਀z;")); 780df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 790df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&#\n", 800df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&#
")); 810df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 820df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&#x\n", 830df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&#x
")); 840df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 850df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n\n", 860df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("

")); 870df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 880df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&#\n", 890df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&#
")); 900df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 910df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&#x", 920df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&#x")); 930df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 940df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "", // NUL elided. 950df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("�")); 960df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 970df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&#", 980df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&#")); 990df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 1000df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1010df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\\", 1020df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("\\")); 1030df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1040df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&", 1050df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&")); 1060df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 1070df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1080df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "�a;", 1090df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("�a;")); 1100df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1110df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 1120df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml(" ")); 1130df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1140df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 1150df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 1160df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1170df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 1180df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 1190df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1200df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\t", 1210df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("	")); 1220df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1230df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 1240df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 1250df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1260df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "�ziggy", 1270df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("�ziggy")); 1280df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1290df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&#\n", 1300df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&#
")); 1310df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1320df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 1330df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("�
")); 1340df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1350df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 1360df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml(" ")); 1370df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1380df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&#\n", 1390df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&# ")); 1400df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1410df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "", // Invalid XML char elided. 1420df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("")); 1430df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1440df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\t", 1450df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("	")); 1460df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1470df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\n", 1480df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("
")); 1490df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 1500df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel // test the named escapes 1510df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1520df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "<", 1530df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("<")); 1540df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1550df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel ">", 1560df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml(">")); 1570df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1580df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\"", 1590df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml(""")); 1600df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1610df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "'", 1620df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("'")); 1630df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1640df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "'", 1650df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("'")); 1660df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1670df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "'", 1680df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("'")); 1690df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1700df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&", 1710df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&")); 1720df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1730df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "<", 1740df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&lt;")); 1750df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1760df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&", 1770df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&")); 1780df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1790df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&", 1800df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&")); 1810df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1820df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&", 1830df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&AmP;")); 1840df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1850df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\u0391", 1860df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("Α")); 1870df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1880df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "\u03b1", 1890df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("α")); 1900df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 1910df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1920df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&;", 1930df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&;")); 1940df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 1950df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "&bogus;", 1960df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.decodeHtml("&bogus;")); 1970df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel } 1980df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 199be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel @Test 200ff252bf1803947ae2266e8ddc58ef383225be32amikesamuel public static final void testAppendNumericEntityAndEncodeOnto() 201ff252bf1803947ae2266e8ddc58ef383225be32amikesamuel throws Exception { 2020df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel StringBuilder sb = new StringBuilder(); 2030df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel StringBuilder cps = new StringBuilder(); 2040df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel for (int codepoint : new int[] { 2050df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 0, 9, '\n', '@', 0x80, 0xff, 0x100, 0xfff, 0x1000, 0x123a, 0xffff, 2060df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 0x10000, Character.MAX_CODE_POINT }) { 2070df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.appendNumericEntity(codepoint, sb); 2080df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel sb.append(' '); 2090df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 2100df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel cps.appendCodePoint(codepoint).append(' '); 2110df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel } 2120df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 2130df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 2140df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel "� 	 @ € ÿ Ā ࿿ က " 2150df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel + "ሺ  𐀀  ", 2160df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel sb.toString()); 2170df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 2180df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel StringBuilder out = new StringBuilder(); 2190df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.encodeHtmlOnto(cps.toString(), out); 2200df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals( 2210df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel " \t \n @ \u0080 \u00ff \u0100 \u0fff \u1000 " 2220df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel + "\u123a 𐀀  ", 2230df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel out.toString()); 2240df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel } 2250df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 2260df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel private static final void assertStripped(String stripped, String orig) { 2270df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel String actual = Encoding.stripBannedCodeunits(orig); 2280df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals(orig, stripped, actual); 2290df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel if (stripped.equals(orig)) { 2300df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertSame(actual, orig); 2310df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel } 2320df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 2330df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel StringBuilder sb = new StringBuilder(orig); 2340df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.stripBannedCodeunits(sb); 2350df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertEquals(orig, stripped, sb.toString()); 2360df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel } 2370df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 238be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel @Test 239be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel public static final void testStripBannedCodeunits() { 2400df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("", ""); 2410df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foo", "foo"); 2420df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foobar", "foo\u0000bar"); 2430df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foobar", "foo\u0000bar\u0000"); 2440df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foobar", "foo\ufffebar\u0008"); 2450df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foobar", "foo\ud800bar\udc00"); 2460df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foo\ud800\udc00bar", "foo\ud800\ud800\udc00bar"); 2470df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foo\ud800\udc00bar", "foo\ud800\udc00\ud800bar"); 2480df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foo\ud800\udc00bar", "foo\ud800\udc00\udc00bar"); 2490df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foo\ud800\udc00bar", "foo\udc00\ud800\udc00bar"); 2500df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foo\ud834\udd1ebar", "foo\ud834\udd1ebar"); 2510df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("foo\ud834\udd1e", "foo\ud834\udd1e"); 2520df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel assertStripped("\uffef\ufffd", "\uffef\ufffd\ufffe\uffff"); 2530df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel } 2540df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel} 255