18403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Copyright (c) 2011, Mike Samuel
28403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// All rights reserved.
38403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel//
48403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistribution and use in source and binary forms, with or without
58403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// modification, are permitted provided that the following conditions
68403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// are met:
78403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel//
88403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistributions of source code must retain the above copyright
98403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// notice, this list of conditions and the following disclaimer.
108403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistributions in binary form must reproduce the above copyright
118403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// notice, this list of conditions and the following disclaimer in the
128403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// documentation and/or other materials provided with the distribution.
138403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Neither the name of the OWASP nor the names of its contributors may
148403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// be used to endorse or promote products derived from this software
158403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// without specific prior written permission.
168403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
178403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
188403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
198403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
208403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
218403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
228403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
238403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
248403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
258403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
268403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
278403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// POSSIBILITY OF SUCH DAMAGE.
288403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel
295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.compackage org.owasp.html;
305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
314e867904c8295537803c1c8a076e130df5674b58mikesamuelimport com.google.common.annotations.VisibleForTesting;
325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.Closeable;
335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.Flushable;
345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.IOException;
355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.util.Iterator;
365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.util.List;
374e867904c8295537803c1c8a076e130df5674b58mikesamuelimport javax.annotation.WillCloseWhenClosed;
381bfae835221847e7791625e2baa98a60eb3cfa8amikesamuelimport javax.annotation.concurrent.NotThreadSafe;
395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com/**
415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * Given a series of HTML tokens, writes valid, normalized HTML to the output.
425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * The output will have well-defined tag boundaries, but there may be orphaned
435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * or missing close and open tags.
445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * The result of two renderers can always be concatenated to produce a larger
455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * snippet of HTML, but if the first was called with
465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * {@code writeOpenTag("plaintext", ...)}, then any tags in the second will not
475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * be interpreted as tags in the concatenated version.
485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */
495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com@TCB
501bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel@NotThreadSafe
515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.compublic class HtmlStreamRenderer implements HtmlStreamEventReceiver {
525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final Appendable output;
545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final Handler<? super IOException> ioExHandler;
555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final Handler<? super String> badHtmlHandler;
565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private String lastTagOpened;
575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private StringBuilder pendingUnescaped;
585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private boolean open;
595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  /**
61f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   * Factory.
625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * @param output the buffer to which HTML is streamed.
635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * @param ioExHandler called with any exception raised by output.
64f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   * @param badHtmlHandler receives alerts when HTML cannot be rendered because
65f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   *    there is not valid HTML tree that results from that series of calls.
66f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   *    E.g. it is not possible to create an HTML {@code <style>} element whose
67f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   *    textual content is {@code "</style>"}.
685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   */
695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public static HtmlStreamRenderer create(
704e867904c8295537803c1c8a076e130df5674b58mikesamuel      @WillCloseWhenClosed Appendable output,
714e867904c8295537803c1c8a076e130df5674b58mikesamuel      Handler<? super IOException> ioExHandler,
725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      Handler<? super String> badHtmlHandler) {
735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (output instanceof Closeable) {
745c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return new CloseableHtmlStreamRenderer(
755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output, ioExHandler, badHtmlHandler);
765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } else {
775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return new HtmlStreamRenderer(output, ioExHandler, badHtmlHandler);
785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
81f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  /**
82f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   * Factory.
83f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   * @param output the buffer to which HTML is streamed.
84f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   * @param badHtmlHandler receives alerts when HTML cannot be rendered because
85f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   *    there is not valid HTML tree that results from that series of calls.
86f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   *    E.g. it is not possible to create an HTML {@code <style>} element whose
87f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   *    textual content is {@code "</style>"}.
88f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel   */
89f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  public static HtmlStreamRenderer create(
90f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel      StringBuilder output, Handler<? super String> badHtmlHandler) {
91f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    // Propagate since StringBuilder should not throw IOExceptions.
92f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel    return create(output, Handler.PROPAGATE, badHtmlHandler);
93f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  }
94f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel
955c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private HtmlStreamRenderer(
965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      Appendable output, Handler<? super IOException> ioExHandler,
975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      Handler<? super String> badHtmlHandler) {
985c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    this.output = output;
995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    this.ioExHandler = ioExHandler;
1005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    this.badHtmlHandler = badHtmlHandler;
1015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  /**
1045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * Called when the series of calls make no sense.
1055c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * May be overridden to throw an unchecked throwable, to log, or to take some
1065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * other action.
1075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   *
1085c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * @param message for human consumption.
1095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * @param identifier an HTML identifier associated with the message.
1105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   */
111f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel  private final void error(String message, CharSequence identifier) {
112f27efcbb0ed6810cb608024c6430338fe5f32bb7mikesamuel    if (badHtmlHandler != Handler.DO_NOTHING) {   // Avoid string append.
1135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      badHtmlHandler.handle(message + " : " + identifier);
1145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void openDocument() throws IllegalStateException {
1185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (open) { throw new IllegalStateException(); }
1195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    open = true;
1205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void closeDocument() throws IllegalStateException {
1235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!open) { throw new IllegalStateException(); }
1245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (pendingUnescaped != null) {
1255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      closeTag(lastTagOpened);
1265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    open = false;
1285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (output instanceof Flushable) {
1295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      try {
1305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        ((Flushable) output).flush();
1315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      } catch (IOException ex) {
1325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        ioExHandler.handle(ex);
1335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
1345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final boolean isDocumentOpen() {
1385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    return open;
1395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void openTag(String elementName, List<String> attrs) {
1425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    try {
1435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      writeOpenTag(elementName, attrs);
1445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } catch (IOException ex) {
1455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      ioExHandler.handle(ex);
1465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private void writeOpenTag(String elementName, List<? extends String> attrs)
1505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      throws IOException {
1515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!open) { throw new IllegalStateException(); }
152b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel    elementName = safeName(elementName);
1535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!isValidHtmlName(elementName)) {
1545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      error("Invalid element name", elementName);
1555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return;
1565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (pendingUnescaped != null) {
1585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      error("Tag content cannot appear inside CDATA element", elementName);
1595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return;
1605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    switch (HtmlTextEscapingMode.getModeForTag(elementName)) {
1630df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      case CDATA_SOMETIMES:
1645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      case CDATA:
1655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      case PLAIN_TEXT:
1665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        lastTagOpened = elementName;
1675c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        pendingUnescaped = new StringBuilder();
1685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        break;
1695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      default:
1705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    output.append('<').append(elementName);
1735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1745c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    for (Iterator<? extends String> attrIt = attrs.iterator();
1755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com         attrIt.hasNext();) {
1765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      String name = attrIt.next();
1775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      String value = attrIt.next();
1785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      name = HtmlLexer.canonicalName(name);
1795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if (!isValidHtmlName(name)) {
1805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        error("Invalid attr name", name);
1815c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        continue;
1825c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
1835c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      output.append(' ').append(name).append('=').append('"');
1840df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      Encoding.encodeHtmlOnto(value, output);
1851bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel      if (value.indexOf('`') != -1) {
1861bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // Apparently, in quirks mode, IE8 does a poor job producing innerHTML
1871bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // values.  Given
1881bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        //     <div attr="``foo=bar">
1891bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // we encode &#96; but if JavaScript does:
1901bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        //    nodeA.innerHTML = nodeB.innerHTML;
1911bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // and nodeB contains the DIV above, then IE8 will produce
1921bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        //     <div attr=``foo=bar>
1931bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // as the value of nodeB.innerHTML and assign it to nodeA.
1941bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // IE8's HTML parser treats `` as a blank attribute value and foo=bar
1951bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // becomes a separate attribute.
1961bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // Adding a space at the end of the attribute prevents this by forcing
1971bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // IE8 to put double quotes around the attribute when computing
1981bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        // nodeB.innerHTML.
1991bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel        output.append(' ');
2001bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel      }
2015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      output.append('"');
2025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2040df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    // Limit our output to the intersection of valid XML and valid HTML5 when
2050df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    // the output contains no special HTML5 elements like <title>, <script>, or
2060df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    // <textarea>.
2070df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    if (HtmlTextEscapingMode.isVoidElement(elementName)) {
2080df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      output.append(" /");
2090df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel    }
2100df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel
2115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    output.append('>');
2125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
2135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void closeTag(String elementName) {
2155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    try {
216b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel      writeCloseTag(safeName(elementName));
2175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } catch (IOException ex) {
2185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      ioExHandler.handle(ex);
2195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
2215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final void writeCloseTag(String elementName)
2235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      throws IOException {
2245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!open) { throw new IllegalStateException(); }
2255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    elementName = HtmlLexer.canonicalName(elementName);
2265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!isValidHtmlName(elementName)) {
2275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      error("Invalid element name", elementName);
2285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return;
2295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (pendingUnescaped != null) {
2325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if (!lastTagOpened.equals(elementName)) {
2335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        error("Tag content cannot appear inside CDATA element", elementName);
2345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        return;
2355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      } else {
2368560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel        StringBuilder cdataContent = pendingUnescaped;
2375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        pendingUnescaped = null;
2380df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel        Encoding.stripBannedCodeunits(cdataContent);
2398560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel        int problemIndex = checkHtmlCdataCloseable(lastTagOpened, cdataContent);
2408560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel        if (problemIndex == -1) {
2418560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          output.append(cdataContent);
2425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        } else {
2438560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          error(
2448560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              "Invalid CDATA text content",
2458560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              cdataContent.subSequence(
2468560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel                  problemIndex,
247f06f9a5ed2a3dfd88320a8ad14ae1c032c6a80cfmikesamuel                  Math.min(problemIndex + 10, cdataContent.length())));
2485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          // Still output the close tag.
2495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        }
2505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
2515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if ("plaintext".equals(elementName)) { return; }
2525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    output.append("</").append(elementName).append(">");
2545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
2555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void text(String text) {
2575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    try {
2585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      writeText(text);
2595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } catch (IOException ex) {
2605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      ioExHandler.handle(ex);
2615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
2635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final void writeText(String text) throws IOException {
2655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!open) { throw new IllegalStateException(); }
2665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (pendingUnescaped != null) {
2670df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      pendingUnescaped.append(text);
2685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } else {
2690df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel      Encoding.encodeHtmlOnto(text, output);  // Works for RCDATA.
2705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
2725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2738560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel  private static int checkHtmlCdataCloseable(
2748560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel      String localName, StringBuilder sb) {
2758560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel    int escapingTextSpanStart = -1;
2768560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel    for (int i = 0, n = sb.length(); i < n; ++i) {
2778560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel      char ch = sb.charAt(i);
2788560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel      switch (ch) {
2798560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel        case '<':
2808560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          if (i + 3 < n
2818560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              && '!' == sb.charAt(i + 1)
2828560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              && '-' == sb.charAt(i + 2)
2838560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              && '-' == sb.charAt(i + 3)) {
2848560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            if (escapingTextSpanStart == -1) {
2858560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              escapingTextSpanStart = i;
2868560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            } else {
2878560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              return i;
2888560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            }
2898560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          } else if (i + 1 + localName.length() < n
2908560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel                     && '/' == sb.charAt(i + 1)
2918560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel                     && Strings.regionMatchesIgnoreCase(
2928560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel                         sb, i + 2, localName, 0, localName.length())) {
2938560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            // A close tag contained in the content.
2948560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            if (escapingTextSpanStart < 0) {
2958560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // We could try some recovery strategies here.
2968560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // E.g. prepending "/<!--\n" to sb if "script".equals(localName)
2978560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              return i;
2988560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            }
2998560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            if (!"script".equals(localName)) {
3008560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // Script tags are commonly included inside script tags.
3018560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // <script><!--document.write('<script>f()</script>');--></script>
3028560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // but this does not happen in other CDATA element types.
3038560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // Actually allowing an end tag inside others is problematic.
3048560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // Specifically,
3058560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // <style><!--</style>-->/* foo */</style>
3068560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              // displays the text "/* foo */" on some browsers.
3078560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel              return i;
3088560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            }
3095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          }
3108560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          break;
3118560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel        case '>':
3128560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          // From the HTML5 spec:
3138560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          //    The text in style, script, title, and textarea elements must not
3148560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          //    have an escaping text span start that is not followed by an
3158560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          //    escaping text span end.
3168560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          // We look left since the HTML 5 spec allows the escaping text span
3178560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          // end to share dashes with the start.
3188560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          if (i >= 2 && '-' == sb.charAt(i - 1) && '-' == sb.charAt(i - 2)) {
3198560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            if (escapingTextSpanStart < 0) { return i - 2; }
3208560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel            escapingTextSpanStart = -1;
3218560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          }
3228560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel          break;
323489a0ec7301a86af8497d24748336db09ca278damikesamuel        default:
324489a0ec7301a86af8497d24748336db09ca278damikesamuel          break;
3255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
3265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
3278560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel    if (escapingTextSpanStart >= 0) {
3288560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel      // We could try recovery strategies here.
3298560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel      // E.g. appending "//-->" to the buffer if "script".equals(localName)
3308560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel      return escapingTextSpanStart;
3318560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel    }
3328560af5e2982092cb27cce62aa9cfa5bb45ea387mikesamuel    return -1;
3335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
3345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  @VisibleForTesting
3375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  static boolean isValidHtmlName(String name) {
3385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    int n = name.length();
3395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (n == 0) { return false; }
3405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (n > 128) { return false; }
3415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    boolean isNamespaced = false;
3425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    for (int i = 0; i < n; ++i) {
3435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      char ch = name.charAt(i);
3445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      switch (ch) {
3455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case ':':
3465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          if (isNamespaced) { return false; }
3475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          isNamespaced = true;
3484e867904c8295537803c1c8a076e130df5674b58mikesamuel          if (i == 0 || i + 1 == n) { return false; }
3494e867904c8295537803c1c8a076e130df5674b58mikesamuel          break;
3505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '-':
3515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          if (i == 0 || i + 1 == n) { return false; }
3525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
3535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        default:
3545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          if (ch <= '9') {
3555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            if (i == 0 || ch < '0') { return false; }
3565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          } else if ('A' <= ch && ch <= 'z') {
3575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            if ('Z' < ch && ch < 'a') { return false; }
3585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          } else {
3595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            return false;
3605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          }
3615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
3625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
3635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
3645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    return true;
3655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
3665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3671bfae835221847e7791625e2baa98a60eb3cfa8amikesamuel  /**
368b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel   * Canonicalizes the element name and possibly substitutes an alternative
369b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel   * that has more consistent semantics.
3700df9131f7be5c0f90ce70d43b7e4239a6a6df016mikesamuel   */
371b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel  static String safeName(String elementName) {
372b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel    elementName = HtmlLexer.canonicalName(elementName);
373b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel
374b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel    // Substitute a reliably non-raw-text element for raw-text and
375b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel    // plain-text elements.
376b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel    switch (elementName.length()) {
377b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel      case 3:
378b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel        if ("xmp".equals(elementName)) { return "pre"; }
379b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel        break;
380b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel      case 7:
381b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel        if ("listing".equals(elementName)) { return "pre"; }
382b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel        break;
383b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel      case 9:
384b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel        if ("plaintext".equals(elementName)) { return "pre"; }
385b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel        break;
386b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel    }
387b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel    return elementName;
388b530bfd7496ead9ab962726781dd90b6c739cdcemikesamuel  }
3895c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3905c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  static class CloseableHtmlStreamRenderer extends HtmlStreamRenderer
3915c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      implements Closeable {
3925c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    private final Closeable closeable;
3935c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3945c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    CloseableHtmlStreamRenderer(
3954e867904c8295537803c1c8a076e130df5674b58mikesamuel        @WillCloseWhenClosed
3965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        Appendable output, Handler<? super IOException> errorHandler,
3975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        Handler<? super String> badHtmlHandler) {
3985c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      super(output, errorHandler, badHtmlHandler);
3995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      this.closeable = (Closeable) output;
4005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
4015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
4025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    public void close() throws IOException {
4035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if (isDocumentOpen()) { closeDocument(); }
4045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      closeable.close();
4055c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
4065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
4075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com}
408