18403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Copyright (c) 2011, Mike Samuel 28403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// All rights reserved. 38403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// 48403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistribution and use in source and binary forms, with or without 58403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// modification, are permitted provided that the following conditions 68403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// are met: 78403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// 88403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistributions of source code must retain the above copyright 98403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// notice, this list of conditions and the following disclaimer. 108403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistributions in binary form must reproduce the above copyright 118403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// notice, this list of conditions and the following disclaimer in the 128403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// documentation and/or other materials provided with the distribution. 138403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Neither the name of the OWASP nor the names of its contributors may 148403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// be used to endorse or promote products derived from this software 158403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// without specific prior written permission. 168403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 178403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 188403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 198403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 208403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 218403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 228403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 238403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 248403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 258403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 268403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 278403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// POSSIBILITY OF SUCH DAMAGE. 288403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel 295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.compackage org.owasp.html; 305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 314e867904c8295537803c1c8a076e130df5674b58mikesamuelimport com.google.common.annotations.VisibleForTesting; 325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.Closeable; 335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.Flushable; 345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.IOException; 355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.util.Iterator; 365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.util.List; 374e867904c8295537803c1c8a076e130df5674b58mikesamuelimport javax.annotation.WillCloseWhenClosed; 381bfae835221847e7791625e2baa98a60eb3cfa8amikesamuelimport javax.annotation.concurrent.NotThreadSafe; 395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com/** 415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * Given a series of HTML tokens, writes valid, normalized HTML to the output. 425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * The output will have well-defined tag boundaries, but there may be orphaned 435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * or missing close and open tags. 445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * The result of two renderers can always be concatenated to produce a larger 455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * snippet of HTML, but if the first was called with 465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * {@code writeOpenTag("plaintext", ...)}, then any tags in the second will not 475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * be interpreted as tags in the concatenated version. 485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */ 495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com@TCB 501bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel@NotThreadSafe 515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.compublic class HtmlStreamRenderer implements HtmlStreamEventReceiver { 525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final Appendable output; 545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final Handler<? super IOException> ioExHandler; 555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final Handler<? super String> badHtmlHandler; 565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private String lastTagOpened; 575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private StringBuilder pendingUnescaped; 585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private boolean open; 595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com /** 61f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * Factory. 625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * @param output the buffer to which HTML is streamed. 635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * @param ioExHandler called with any exception raised by output. 64f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * @param badHtmlHandler receives alerts when HTML cannot be rendered because 65f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * there is not valid HTML tree that results from that series of calls. 66f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * E.g. it is not possible to create an HTML {@code <style>} element whose 67f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * textual content is {@code "</style>"}. 685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */ 695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public static HtmlStreamRenderer create( 704e867904c8295537803c1c8a076e130df5674b58mikesamuel @WillCloseWhenClosed Appendable output, 714e867904c8295537803c1c8a076e130df5674b58mikesamuel Handler<? super IOException> ioExHandler, 725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Handler<? super String> badHtmlHandler) { 735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (output instanceof Closeable) { 745c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return new CloseableHtmlStreamRenderer( 755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output, ioExHandler, badHtmlHandler); 765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return new HtmlStreamRenderer(output, ioExHandler, badHtmlHandler); 785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 81f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel /** 82f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * Factory. 83f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * @param output the buffer to which HTML is streamed. 84f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * @param badHtmlHandler receives alerts when HTML cannot be rendered because 85f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * there is not valid HTML tree that results from that series of calls. 86f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * E.g. it is not possible to create an HTML {@code <style>} element whose 87f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel * textual content is {@code "</style>"}. 88f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel */ 89f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel public static HtmlStreamRenderer create( 90f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel StringBuilder output, Handler<? super String> badHtmlHandler) { 91f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel // Propagate since StringBuilder should not throw IOExceptions. 92f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel return create(output, Handler.PROPAGATE, badHtmlHandler); 93f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel } 94f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel 955c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private HtmlStreamRenderer( 965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Appendable output, Handler<? super IOException> ioExHandler, 975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Handler<? super String> badHtmlHandler) { 985c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com this.output = output; 995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com this.ioExHandler = ioExHandler; 1005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com this.badHtmlHandler = badHtmlHandler; 1015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com /** 1045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * Called when the series of calls make no sense. 1055c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * May be overridden to throw an unchecked throwable, to log, or to take some 1065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * other action. 1075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * 1085c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * @param message for human consumption. 1095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * @param identifier an HTML identifier associated with the message. 1105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */ 111f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel private final void error(String message, CharSequence identifier) { 112f27efcbb0ed6810cb608024c6430338fe5f32bb7mikesamuel if (badHtmlHandler != Handler.DO_NOTHING) { // Avoid string append. 1135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com badHtmlHandler.handle(message + " : " + identifier); 1145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void openDocument() throws IllegalStateException { 1185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (open) { throw new IllegalStateException(); } 1195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com open = true; 1205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void closeDocument() throws IllegalStateException { 1235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!open) { throw new IllegalStateException(); } 1245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (pendingUnescaped != null) { 1255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com closeTag(lastTagOpened); 1265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com open = false; 1285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (output instanceof Flushable) { 1295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com try { 1305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ((Flushable) output).flush(); 1315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } catch (IOException ex) { 1325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ioExHandler.handle(ex); 1335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final boolean isDocumentOpen() { 1385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return open; 1395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void openTag(String elementName, List<String> attrs) { 1425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com try { 1435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com writeOpenTag(elementName, attrs); 1445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } catch (IOException ex) { 1455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ioExHandler.handle(ex); 1465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private void writeOpenTag(String elementName, List<? extends String> attrs) 1505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com throws IOException { 1515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!open) { throw new IllegalStateException(); } 152b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel elementName = safeName(elementName); 1535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!isValidHtmlName(elementName)) { 1545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Invalid element name", elementName); 1555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return; 1565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (pendingUnescaped != null) { 1585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Tag content cannot appear inside CDATA element", elementName); 1595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return; 1605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com switch (HtmlTextEscapingMode.getModeForTag(elementName)) { 1630df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel case CDATA_SOMETIMES: 1645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case CDATA: 1655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case PLAIN_TEXT: 1665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com lastTagOpened = elementName; 1675c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pendingUnescaped = new StringBuilder(); 1685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 1695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com default: 1705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append('<').append(elementName); 1735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1745c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com for (Iterator<? extends String> attrIt = attrs.iterator(); 1755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com attrIt.hasNext();) { 1765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com String name = attrIt.next(); 1775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com String value = attrIt.next(); 1785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com name = HtmlLexer.canonicalName(name); 1795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!isValidHtmlName(name)) { 1805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Invalid attr name", name); 1815c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com continue; 1825c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1835c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(' ').append(name).append('=').append('"'); 1840df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.encodeHtmlOnto(value, output); 1851bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel if (value.indexOf('`') != -1) { 1861bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // Apparently, in quirks mode, IE8 does a poor job producing innerHTML 1871bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // values. Given 1881bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // <div attr="``foo=bar"> 1891bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // we encode ` but if JavaScript does: 1901bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // nodeA.innerHTML = nodeB.innerHTML; 1911bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // and nodeB contains the DIV above, then IE8 will produce 1921bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // <div attr=``foo=bar> 1931bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // as the value of nodeB.innerHTML and assign it to nodeA. 1941bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // IE8's HTML parser treats `` as a blank attribute value and foo=bar 1951bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // becomes a separate attribute. 1961bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // Adding a space at the end of the attribute prevents this by forcing 1971bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // IE8 to put double quotes around the attribute when computing 1981bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel // nodeB.innerHTML. 1991bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel output.append(' '); 2001bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel } 2015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append('"'); 2025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2040df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel // Limit our output to the intersection of valid XML and valid HTML5 when 2050df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel // the output contains no special HTML5 elements like <title>, <script>, or 2060df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel // <textarea>. 2070df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel if (HtmlTextEscapingMode.isVoidElement(elementName)) { 2080df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel output.append(" /"); 2090df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel } 2100df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel 2115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append('>'); 2125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void closeTag(String elementName) { 2155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com try { 216b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel writeCloseTag(safeName(elementName)); 2175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } catch (IOException ex) { 2185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ioExHandler.handle(ex); 2195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final void writeCloseTag(String elementName) 2235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com throws IOException { 2245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!open) { throw new IllegalStateException(); } 2255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com elementName = HtmlLexer.canonicalName(elementName); 2265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!isValidHtmlName(elementName)) { 2275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Invalid element name", elementName); 2285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return; 2295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (pendingUnescaped != null) { 2325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!lastTagOpened.equals(elementName)) { 2335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Tag content cannot appear inside CDATA element", elementName); 2345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return; 2355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 2368560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel StringBuilder cdataContent = pendingUnescaped; 2375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pendingUnescaped = null; 2380df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.stripBannedCodeunits(cdataContent); 2398560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel int problemIndex = checkHtmlCdataCloseable(lastTagOpened, cdataContent); 2408560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel if (problemIndex == -1) { 2418560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel output.append(cdataContent); 2425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 2438560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel error( 2448560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel "Invalid CDATA text content", 2458560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel cdataContent.subSequence( 2468560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel problemIndex, 247f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel Math.min(problemIndex + 10, cdataContent.length()))); 2485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // Still output the close tag. 2495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if ("plaintext".equals(elementName)) { return; } 2525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append("</").append(elementName).append(">"); 2545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void text(String text) { 2575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com try { 2585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com writeText(text); 2595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } catch (IOException ex) { 2605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ioExHandler.handle(ex); 2615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final void writeText(String text) throws IOException { 2655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!open) { throw new IllegalStateException(); } 2665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (pendingUnescaped != null) { 2670df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel pendingUnescaped.append(text); 2685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 2690df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel Encoding.encodeHtmlOnto(text, output); // Works for RCDATA. 2705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2738560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel private static int checkHtmlCdataCloseable( 2748560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel String localName, StringBuilder sb) { 2758560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel int escapingTextSpanStart = -1; 2768560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel for (int i = 0, n = sb.length(); i < n; ++i) { 2778560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel char ch = sb.charAt(i); 2788560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel switch (ch) { 2798560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel case '<': 2808560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel if (i + 3 < n 2818560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel && '!' == sb.charAt(i + 1) 2828560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel && '-' == sb.charAt(i + 2) 2838560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel && '-' == sb.charAt(i + 3)) { 2848560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel if (escapingTextSpanStart == -1) { 2858560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel escapingTextSpanStart = i; 2868560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel } else { 2878560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel return i; 2888560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel } 2898560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel } else if (i + 1 + localName.length() < n 2908560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel && '/' == sb.charAt(i + 1) 2918560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel && Strings.regionMatchesIgnoreCase( 2928560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel sb, i + 2, localName, 0, localName.length())) { 2938560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // A close tag contained in the content. 2948560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel if (escapingTextSpanStart < 0) { 2958560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // We could try some recovery strategies here. 2968560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // E.g. prepending "/<!--\n" to sb if "script".equals(localName) 2978560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel return i; 2988560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel } 2998560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel if (!"script".equals(localName)) { 3008560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // Script tags are commonly included inside script tags. 3018560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // <script><!--document.write('<script>f()</script>');--></script> 3028560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // but this does not happen in other CDATA element types. 3038560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // Actually allowing an end tag inside others is problematic. 3048560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // Specifically, 3058560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // <style><!--</style>-->/* foo */</style> 3068560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // displays the text "/* foo */" on some browsers. 3078560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel return i; 3088560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel } 3095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3108560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel break; 3118560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel case '>': 3128560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // From the HTML5 spec: 3138560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // The text in style, script, title, and textarea elements must not 3148560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // have an escaping text span start that is not followed by an 3158560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // escaping text span end. 3168560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // We look left since the HTML 5 spec allows the escaping text span 3178560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // end to share dashes with the start. 3188560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel if (i >= 2 && '-' == sb.charAt(i - 1) && '-' == sb.charAt(i - 2)) { 3198560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel if (escapingTextSpanStart < 0) { return i - 2; } 3208560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel escapingTextSpanStart = -1; 3218560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel } 3228560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel break; 323489a0ec7301a86af8497d24748336db09ca278damikesamuel default: 324489a0ec7301a86af8497d24748336db09ca278damikesamuel break; 3255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3278560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel if (escapingTextSpanStart >= 0) { 3288560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // We could try recovery strategies here. 3298560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel // E.g. appending "//-->" to the buffer if "script".equals(localName) 3308560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel return escapingTextSpanStart; 3318560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel } 3328560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel return -1; 3335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com @VisibleForTesting 3375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com static boolean isValidHtmlName(String name) { 3385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com int n = name.length(); 3395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (n == 0) { return false; } 3405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (n > 128) { return false; } 3415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com boolean isNamespaced = false; 3425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com for (int i = 0; i < n; ++i) { 3435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com char ch = name.charAt(i); 3445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com switch (ch) { 3455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case ':': 3465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (isNamespaced) { return false; } 3475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com isNamespaced = true; 3484e867904c8295537803c1c8a076e130df5674b58mikesamuel if (i == 0 || i + 1 == n) { return false; } 3494e867904c8295537803c1c8a076e130df5674b58mikesamuel break; 3505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '-': 3515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (i == 0 || i + 1 == n) { return false; } 3525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 3535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com default: 3545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (ch <= '9') { 3555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (i == 0 || ch < '0') { return false; } 3565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else if ('A' <= ch && ch <= 'z') { 3575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if ('Z' < ch && ch < 'a') { return false; } 3585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 3595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return false; 3605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 3625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return true; 3655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3671bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel /** 368b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel * Canonicalizes the element name and possibly substitutes an alternative 369b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel * that has more consistent semantics. 3700df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel */ 371b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel static String safeName(String elementName) { 372b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel elementName = HtmlLexer.canonicalName(elementName); 373b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel 374b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel // Substitute a reliably non-raw-text element for raw-text and 375b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel // plain-text elements. 376b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel switch (elementName.length()) { 377b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel case 3: 378b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel if ("xmp".equals(elementName)) { return "pre"; } 379b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel break; 380b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel case 7: 381b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel if ("listing".equals(elementName)) { return "pre"; } 382b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel break; 383b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel case 9: 384b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel if ("plaintext".equals(elementName)) { return "pre"; } 385b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel break; 386b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel } 387b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel return elementName; 388b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel } 3895c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3905c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com static class CloseableHtmlStreamRenderer extends HtmlStreamRenderer 3915c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com implements Closeable { 3925c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final Closeable closeable; 3935c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3945c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com CloseableHtmlStreamRenderer( 3954e867904c8295537803c1c8a076e130df5674b58mikesamuel @WillCloseWhenClosed 3965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Appendable output, Handler<? super IOException> errorHandler, 3975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Handler<? super String> badHtmlHandler) { 3985c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com super(output, errorHandler, badHtmlHandler); 3995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com this.closeable = (Closeable) output; 4005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 4015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 4025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public void close() throws IOException { 4035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (isDocumentOpen()) { closeDocument(); } 4045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com closeable.close(); 4055c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 4065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 4075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com} 408