10df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Copyright (c) 2012, Mike Samuel
20df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// All rights reserved.
30df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel//
40df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Redistribution and use in source and binary forms, with or without
50df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// modification, are permitted provided that the following conditions
60df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// are met:
70df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel//
80df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Redistributions of source code must retain the above copyright
90df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// notice, this list of conditions and the following disclaimer.
100df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Redistributions in binary form must reproduce the above copyright
110df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// notice, this list of conditions and the following disclaimer in the
120df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// documentation and/or other materials provided with the distribution.
130df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// Neither the name of the OWASP nor the names of its contributors may
140df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// be used to endorse or promote products derived from this software
150df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// without specific prior written permission.
160df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
170df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
180df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
190df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
200df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
210df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
220df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
230df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
240df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
250df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
260df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
270df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel// POSSIBILITY OF SUCH DAMAGE.
280df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
290df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuelpackage org.owasp.html;
300df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
31be666032a113a8af92bc557add8e83579cf0ef5cmikesamuelimport org.junit.Test;
32be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel
330df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuelimport junit.framework.TestCase;
340df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
350df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuelpublic class EncodingTest extends TestCase {
360df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
37be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel  @Test
38be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel  public static final void testDecodeHtml() {
390df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    String html =
400df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      "The quick brown fox
jumps over
the lazy dog
";
410df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    //          1         2         3         4         5         6
420df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    // 123456789012345678901234567890123456789012345678901234567890123456789
430df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    String golden =
440df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      "The quick\u00a0brown fox\njumps over\r\nthe lazy dog\n";
450df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(golden, Encoding.decodeHtml(html));
460df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
470df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    // Don't allocate a new string when no entities.
480df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertSame(golden, Encoding.decodeHtml(golden));
490df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
500df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    // test interrupted escapes and escapes at end of file handled gracefully
510df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
520df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\\\\u000a",
530df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("\\\\u000a"));
540df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
550df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
560df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("
"));
570df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
580df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
590df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("
"));
600df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
610df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
620df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("
"));
630df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
640df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
650df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("
"));
660df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
670df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        String.valueOf(Character.toChars(0x10000)),
680df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("𐀀"));
690df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
700df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
710df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#xa"));
720df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
730df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#x00ziggy",
740df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#x00ziggy"));
750df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
760df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#xa00z;",
770df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#xa00z;"));
780df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
790df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#\n",
800df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#
"));
810df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
820df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#x\n",
830df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#x
"));
840df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
850df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n\n",
860df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#xa
"));
870df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
880df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#\n",
890df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#
"));
900df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
910df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#x",
920df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#x"));
930df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
940df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "",  // NUL elided.
950df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#x0"));
960df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
970df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#",
980df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#"));
990df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
1000df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1010df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\\",
1020df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("\\"));
1030df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1040df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&",
1050df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&"));
1060df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
1070df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1080df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#000a;",
1090df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#000a;"));
1100df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1110df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
1120df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("
"));
1130df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1140df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
1150df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("
"));
1160df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1170df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
1180df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("
"));
1190df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1200df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\t",
1210df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("	"));
1220df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1230df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
1240df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#10"));
1250df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1260df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#00ziggy",
1270df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#00ziggy"));
1280df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1290df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#\n",
1300df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#
"));
1310df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1320df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
1330df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#0
"));
1340df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1350df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
1360df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#01
"));
1370df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1380df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&#\n",
1390df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#
"));
1400df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1410df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "",  // Invalid XML char elided.
1420df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#1"));
1430df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1440df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\t",
1450df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#9"));
1460df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1470df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\n",
1480df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#10"));
1490df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
1500df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    // test the named escapes
1510df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1520df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "<",
1530df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&lt;"));
1540df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1550df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        ">",
1560df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&gt;"));
1570df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1580df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\"",
1590df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&quot;"));
1600df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1610df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "'",
1620df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&apos;"));
1630df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1640df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "'",
1650df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#39;"));
1660df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1670df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "'",
1680df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&#x27;"));
1690df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1700df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&",
1710df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&amp;"));
1720df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1730df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&lt;",
1740df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&amp;lt;"));
1750df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1760df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&",
1770df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&AMP;"));
1780df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1790df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&",
1800df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&AMP"));
1810df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1820df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&",
1830df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&AmP;"));
1840df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1850df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\u0391",
1860df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&Alpha;"));
1870df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1880df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "\u03b1",
1890df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&alpha;"));
1900df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
1910df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1920df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&;",
1930df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&;"));
1940df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
1950df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        "&bogus;",
1960df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.decodeHtml("&bogus;"));
1970df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel  }
1980df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
199be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel  @Test
200ff252bf1803947ae2266e8ddc58ef383225be32amikesamuel  public static final void testAppendNumericEntityAndEncodeOnto()
201ff252bf1803947ae2266e8ddc58ef383225be32amikesamuel      throws Exception {
2020df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    StringBuilder sb = new StringBuilder();
2030df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    StringBuilder cps = new StringBuilder();
2040df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    for (int codepoint : new int[] {
2050df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        0, 9, '\n', '@', 0x80, 0xff, 0x100, 0xfff, 0x1000, 0x123a, 0xffff,
2060df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        0x10000, Character.MAX_CODE_POINT }) {
2070df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      Encoding.appendNumericEntity(codepoint, sb);
2080df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      sb.append(' ');
2090df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
2100df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      cps.appendCodePoint(codepoint).append(' ');
2110df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    }
2120df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
2130df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
2140df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel         "&#0; &#9; &#10; &#64; &#x80; &#xff; &#x100; &#xfff; &#x1000; "
2150df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel         + "&#x123a; &#xffff; &#x10000; &#x10ffff; ",
2160df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel         sb.toString());
2170df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
2180df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    StringBuilder out = new StringBuilder();
2190df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    Encoding.encodeHtmlOnto(cps.toString(), out);
2200df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(
2210df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        " \t \n &#64; \u0080 \u00ff \u0100 \u0fff \u1000 "
2220df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        + "\u123a  &#x10000; &#x10ffff; ",
2230df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        out.toString());
2240df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel  }
2250df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
2260df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel  private static final void assertStripped(String stripped, String orig) {
2270df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    String actual = Encoding.stripBannedCodeunits(orig);
2280df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(orig, stripped, actual);
2290df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    if (stripped.equals(orig)) {
2300df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      assertSame(actual, orig);
2310df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    }
2320df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
2330df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    StringBuilder sb = new StringBuilder(orig);
2340df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    Encoding.stripBannedCodeunits(sb);
2350df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertEquals(orig, stripped, sb.toString());
2360df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel  }
2370df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
238be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel  @Test
239be666032a113a8af92bc557add8e83579cf0ef5cmikesamuel  public static final void testStripBannedCodeunits() {
2400df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("", "");
2410df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foo", "foo");
2420df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foobar", "foo\u0000bar");
2430df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foobar", "foo\u0000bar\u0000");
2440df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foobar", "foo\ufffebar\u0008");
2450df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foobar", "foo\ud800bar\udc00");
2460df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foo\ud800\udc00bar", "foo\ud800\ud800\udc00bar");
2470df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foo\ud800\udc00bar", "foo\ud800\udc00\ud800bar");
2480df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foo\ud800\udc00bar", "foo\ud800\udc00\udc00bar");
2490df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foo\ud800\udc00bar", "foo\udc00\ud800\udc00bar");
2500df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foo\ud834\udd1ebar", "foo\ud834\udd1ebar");
2510df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("foo\ud834\udd1e", "foo\ud834\udd1e");
2520df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    assertStripped("\uffef\ufffd", "\uffef\ufffd\ufffe\uffff");
2530df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel  }
2540df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel}
255