HtmlStreamRenderer.java revision 4e867904c8295537803c1c8a076e130df5674b58
15c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.compackage org.owasp.html; 25c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 34e867904c8295537803c1c8a076e130df5674b58mikesamuelimport com.google.common.annotations.VisibleForTesting; 45c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.Closeable; 55c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.Flushable; 65c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.IOException; 75c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.util.Iterator; 85c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.util.List; 94e867904c8295537803c1c8a076e130df5674b58mikesamuelimport javax.annotation.WillCloseWhenClosed; 105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com/** 125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * Given a series of HTML tokens, writes valid, normalized HTML to the output. 135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * The output will have well-defined tag boundaries, but there may be orphaned 145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * or missing close and open tags. 155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * The result of two renderers can always be concatenated to produce a larger 165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * snippet of HTML, but if the first was called with 175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * {@code writeOpenTag("plaintext", ...)}, then any tags in the second will not 185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * be interpreted as tags in the concatenated version. 195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */ 205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com@TCB 215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.compublic class HtmlStreamRenderer implements HtmlStreamEventReceiver { 225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final Appendable output; 245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final Handler<? super IOException> ioExHandler; 255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final Handler<? super String> badHtmlHandler; 265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private String lastTagOpened; 275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private StringBuilder pendingUnescaped; 285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private boolean open; 295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com /** 315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * @param output the buffer to which HTML is streamed. 325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * @param ioExHandler called with any exception raised by output. 335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */ 345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public static HtmlStreamRenderer create( 354e867904c8295537803c1c8a076e130df5674b58mikesamuel @WillCloseWhenClosed Appendable output, 364e867904c8295537803c1c8a076e130df5674b58mikesamuel Handler<? super IOException> ioExHandler, 375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Handler<? super String> badHtmlHandler) { 385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (output instanceof Closeable) { 395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return new CloseableHtmlStreamRenderer( 405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output, ioExHandler, badHtmlHandler); 415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return new HtmlStreamRenderer(output, ioExHandler, badHtmlHandler); 435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private HtmlStreamRenderer( 475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Appendable output, Handler<? super IOException> ioExHandler, 485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Handler<? super String> badHtmlHandler) { 495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com this.output = output; 505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com this.ioExHandler = ioExHandler; 515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com this.badHtmlHandler = badHtmlHandler; 525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public static HtmlStreamRenderer create( 555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com StringBuilder output, Handler<? super String> badHtmlHandler) { 565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // Propagate since StringBuilder should not throw IOExceptions. 575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return create(output, Handler.PROPAGATE, badHtmlHandler); 585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com /** 615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * Called when the series of calls make no sense. 625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * May be overridden to throw an unchecked throwable, to log, or to take some 635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * other action. 645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * 655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * @param message for human consumption. 665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * @param identifier an HTML identifier associated with the message. 675c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */ 685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final void error(String message, String identifier) { 695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (ioExHandler != Handler.DO_NOTHING) { // Avoid string append. 705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com badHtmlHandler.handle(message + " : " + identifier); 715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 745c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com /** 755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * 765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */ 775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void openDocument() throws IllegalStateException { 785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (open) { throw new IllegalStateException(); } 795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com open = true; 805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 815c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 825c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void closeDocument() throws IllegalStateException { 835c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!open) { throw new IllegalStateException(); } 845c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (pendingUnescaped != null) { 855c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com closeTag(lastTagOpened); 865c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 875c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com open = false; 885c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (output instanceof Flushable) { 895c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com try { 905c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ((Flushable) output).flush(); 915c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } catch (IOException ex) { 925c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ioExHandler.handle(ex); 935c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 945c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 955c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final boolean isDocumentOpen() { 985c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return open; 995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void openTag(String elementName, List<String> attrs) { 1025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com try { 1035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com writeOpenTag(elementName, attrs); 1045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } catch (IOException ex) { 1055c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ioExHandler.handle(ex); 1065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1085c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private void writeOpenTag(String elementName, List<? extends String> attrs) 1105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com throws IOException { 1115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!open) { throw new IllegalStateException(); } 1125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com elementName = HtmlLexer.canonicalName(elementName); 1135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!isValidHtmlName(elementName)) { 1145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Invalid element name", elementName); 1155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return; 1165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (pendingUnescaped != null) { 1185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Tag content cannot appear inside CDATA element", elementName); 1195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return; 1205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com switch (HtmlTextEscapingMode.getModeForTag(elementName)) { 1235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case CDATA: 1245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case CDATA_SOMETIMES: 1255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case PLAIN_TEXT: 1265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com lastTagOpened = elementName; 1275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pendingUnescaped = new StringBuilder(); 1285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 1295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com default: 1305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append('<').append(elementName); 1335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com for (Iterator<? extends String> attrIt = attrs.iterator(); 1355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com attrIt.hasNext();) { 1365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com String name = attrIt.next(); 1375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com String value = attrIt.next(); 1385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com name = HtmlLexer.canonicalName(name); 1395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!isValidHtmlName(name)) { 1405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Invalid attr name", name); 1415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com continue; 1425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(' ').append(name).append('=').append('"'); 1445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com escapeHtmlOnto(value, output); 1455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append('"'); 1465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append('>'); 1495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void closeTag(String elementName) { 1525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com try { 1535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com writeCloseTag(elementName); 1545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } catch (IOException ex) { 1555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ioExHandler.handle(ex); 1565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final void writeCloseTag(String elementName) 1605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com throws IOException { 1615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!open) { throw new IllegalStateException(); } 1625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com elementName = HtmlLexer.canonicalName(elementName); 1635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!isValidHtmlName(elementName)) { 1645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Invalid element name", elementName); 1655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return; 1665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1675c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (pendingUnescaped != null) { 1695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!lastTagOpened.equals(elementName)) { 1705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Tag content cannot appear inside CDATA element", elementName); 1715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return; 1725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 1735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com String unescaped = pendingUnescaped.toString(); 1745c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pendingUnescaped = null; 1755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!containsCloseTag(unescaped, lastTagOpened)) { 1765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(unescaped); 1775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 1785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com error("Unescaped text content contains close tag", elementName); 1795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // Still output the close tag. 1805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1815c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1825c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if ("plaintext".equals(elementName)) { return; } 1835c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1845c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append("</").append(elementName).append(">"); 1855c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1865c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1875c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public final void text(String text) { 1885c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com try { 1895c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com writeText(text); 1905c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } catch (IOException ex) { 1915c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com ioExHandler.handle(ex); 1925c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1935c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 1945c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 1955c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final void writeText(String text) throws IOException { 1965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (!open) { throw new IllegalStateException(); } 1975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (pendingUnescaped != null) { 1985c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pendingUnescaped.append(text.replaceAll("\0", "")); 1995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 2005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com escapeHtmlOnto(text, output); // Works for RCDATA. 2015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private static boolean containsCloseTag(String unescaped, String tagName) { 2055c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com boolean allowEscapingTextSpan = HtmlTextEscapingMode.allowsEscapingTextSpan( 2065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com tagName); 2075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2085c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com int unescapedLength = unescaped.length(); 2095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com int tagNameLength = tagName.length(); 2105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com int limit = unescapedLength - tagName.length() - 2; 2115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com for (int i = -1; (i = unescaped.indexOf('<', i + 1)) >= 0;) { 2125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (i <= limit && '/' == unescaped.charAt(i + 1) 2135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com && Strings.regionMatchesIgnoreCase( 2145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com unescaped, i + 2, tagName, 0, tagNameLength)) { 2155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // Content cannot be embedded. 2165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return true; 2175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else if (allowEscapingTextSpan && i + 4 <= unescapedLength 2185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com && '!' == unescaped.charAt(i + 1) 2195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com && '-' == unescaped.charAt(i + 2) 2205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com && '-' == unescaped.charAt(i + 3)) { 2215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // HTML 5 allows the end of an escaping text span to share dashes with 2225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // the open : <!--> and <!---> are both fully formed. 2235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (i + 4 < unescapedLength && unescaped.charAt(i + 4) == '>') { 2245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com i = i + 5; 2255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else if (i + 5 < unescapedLength 2265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com && unescaped.charAt(i + 4) == '-' 2275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com && unescaped.charAt(i + 5) == '>') { 2285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com i = i + 6; 2295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 2305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com i = unescaped.indexOf("-->", i + 4); 2315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (i < 0) { 2325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // If the escaping text span is not closed, then final close tag 2335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // would be covered by the unclosed escaping text span. 2345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return true; 2355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return false; 2405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com @VisibleForTesting 2445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com static boolean isValidHtmlName(String name) { 2455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com int n = name.length(); 2465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (n == 0) { return false; } 2475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (n > 128) { return false; } 2485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com boolean isNamespaced = false; 2495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com for (int i = 0; i < n; ++i) { 2505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com char ch = name.charAt(i); 2515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com switch (ch) { 2525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case ':': 2535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (isNamespaced) { return false; } 2545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com isNamespaced = true; 2554e867904c8295537803c1c8a076e130df5674b58mikesamuel if (i == 0 || i + 1 == n) { return false; } 2564e867904c8295537803c1c8a076e130df5674b58mikesamuel break; 2575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '-': 2585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (i == 0 || i + 1 == n) { return false; } 2595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 2605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com default: 2615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (ch <= '9') { 2625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (i == 0 || ch < '0') { return false; } 2635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else if ('A' <= ch && ch <= 'z') { 2645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if ('Z' < ch && ch < 'a') { return false; } 2655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } else { 2665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return false; 2675c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 2695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com return true; 2725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 2735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 2743a3d912deec6a8382422b602031d12fee7d4c73amikesamuel @SuppressWarnings("fallthrough") 2755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com static void escapeHtmlOnto(String plainText, Appendable output) 2765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com throws IOException { 2775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com int n = plainText.length(); 2785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com int pos = 0; 2795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com for (int i = 0; i < n; ++i) { 2805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com char ch = plainText.charAt(i); 2815c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com switch (ch) { 2825c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '<': 2835c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(plainText, pos, i).append("<"); 2845c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pos = i + 1; 2855c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 2865c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '>': 2875c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(plainText, pos, i).append(">"); 2885c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pos = i + 1; 2895c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 2905c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '&': 2915c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(plainText, pos, i).append("&"); 2925c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pos = i + 1; 2935c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 2945c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '"': 2955c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(plainText, pos, i).append("""); 2965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pos = i + 1; 2975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 2984e867904c8295537803c1c8a076e130df5674b58mikesamuel case '\r': case '\n': break; 2995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com default: 3005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (0x20 <= ch && ch < 0xff00) { 3015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com continue; 3025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // Is a control character or possible full-width version of a 3045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com // special character. 3050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel // FALL-THROUGH 3065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '+': // UTF-7 3075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '=': // Special in attributes. 3085c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '@': // Conditional compilation 3095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case '\'': case '`': // Quoting character 3105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(plainText, pos, i).append("&#") 3115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com .append(String.valueOf((int) ch)).append(';'); 3125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pos = i + 1; 3135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 3145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com case 0: 3155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(plainText, pos, i); 3165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com pos = i + 1; 3175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com break; 3185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com output.append(plainText, pos, n); 3215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com static class CloseableHtmlStreamRenderer extends HtmlStreamRenderer 3255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com implements Closeable { 3265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com private final Closeable closeable; 3275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com CloseableHtmlStreamRenderer( 3294e867904c8295537803c1c8a076e130df5674b58mikesamuel @WillCloseWhenClosed 3305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Appendable output, Handler<? super IOException> errorHandler, 3315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com Handler<? super String> badHtmlHandler) { 3325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com super(output, errorHandler, badHtmlHandler); 3335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com this.closeable = (Closeable) output; 3345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com 3365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com public void close() throws IOException { 3375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com if (isDocumentOpen()) { closeDocument(); } 3385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com closeable.close(); 3395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com } 3415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com} 342