HtmlStreamRenderer.java revision 4e867904c8295537803c1c8a076e130df5674b58
15c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.compackage org.owasp.html;
25c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
34e867904c8295537803c1c8a076e130df5674b58mikesamuelimport com.google.common.annotations.VisibleForTesting;
45c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.Closeable;
55c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.Flushable;
65c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.io.IOException;
75c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.util.Iterator;
85c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.comimport java.util.List;
94e867904c8295537803c1c8a076e130df5674b58mikesamuelimport javax.annotation.WillCloseWhenClosed;
105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com/**
125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * Given a series of HTML tokens, writes valid, normalized HTML to the output.
135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * The output will have well-defined tag boundaries, but there may be orphaned
145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * or missing close and open tags.
155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * The result of two renderers can always be concatenated to produce a larger
165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * snippet of HTML, but if the first was called with
175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * {@code writeOpenTag("plaintext", ...)}, then any tags in the second will not
185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com * be interpreted as tags in the concatenated version.
195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com */
205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com@TCB
215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.compublic class HtmlStreamRenderer implements HtmlStreamEventReceiver {
225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final Appendable output;
245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final Handler<? super IOException> ioExHandler;
255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final Handler<? super String> badHtmlHandler;
265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private String lastTagOpened;
275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private StringBuilder pendingUnescaped;
285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private boolean open;
295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  /**
315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * @param output the buffer to which HTML is streamed.
325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * @param ioExHandler called with any exception raised by output.
335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   */
345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public static HtmlStreamRenderer create(
354e867904c8295537803c1c8a076e130df5674b58mikesamuel      @WillCloseWhenClosed Appendable output,
364e867904c8295537803c1c8a076e130df5674b58mikesamuel      Handler<? super IOException> ioExHandler,
375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      Handler<? super String> badHtmlHandler) {
385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (output instanceof Closeable) {
395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return new CloseableHtmlStreamRenderer(
405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output, ioExHandler, badHtmlHandler);
415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } else {
425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return new HtmlStreamRenderer(output, ioExHandler, badHtmlHandler);
435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private HtmlStreamRenderer(
475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      Appendable output, Handler<? super IOException> ioExHandler,
485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      Handler<? super String> badHtmlHandler) {
495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    this.output = output;
505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    this.ioExHandler = ioExHandler;
515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    this.badHtmlHandler = badHtmlHandler;
525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public static HtmlStreamRenderer create(
555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      StringBuilder output, Handler<? super String> badHtmlHandler) {
565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    // Propagate since StringBuilder should not throw IOExceptions.
575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    return create(output, Handler.PROPAGATE, badHtmlHandler);
585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  /**
615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * Called when the series of calls make no sense.
625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * May be overridden to throw an unchecked throwable, to log, or to take some
635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * other action.
645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   *
655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * @param message for human consumption.
665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   * @param identifier an HTML identifier associated with the message.
675c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   */
685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final void error(String message, String identifier) {
695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (ioExHandler != Handler.DO_NOTHING) {   // Avoid string append.
705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      badHtmlHandler.handle(message + " : " + identifier);
715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
745c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  /**
755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   *
765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com   */
775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void openDocument() throws IllegalStateException {
785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (open) { throw new IllegalStateException(); }
795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    open = true;
805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
815c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
825c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void closeDocument() throws IllegalStateException {
835c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!open) { throw new IllegalStateException(); }
845c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (pendingUnescaped != null) {
855c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      closeTag(lastTagOpened);
865c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
875c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    open = false;
885c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (output instanceof Flushable) {
895c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      try {
905c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        ((Flushable) output).flush();
915c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      } catch (IOException ex) {
925c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        ioExHandler.handle(ex);
935c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
945c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
955c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final boolean isDocumentOpen() {
985c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    return open;
995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void openTag(String elementName, List<String> attrs) {
1025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    try {
1035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      writeOpenTag(elementName, attrs);
1045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } catch (IOException ex) {
1055c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      ioExHandler.handle(ex);
1065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1085c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private void writeOpenTag(String elementName, List<? extends String> attrs)
1105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      throws IOException {
1115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!open) { throw new IllegalStateException(); }
1125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    elementName = HtmlLexer.canonicalName(elementName);
1135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!isValidHtmlName(elementName)) {
1145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      error("Invalid element name", elementName);
1155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return;
1165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (pendingUnescaped != null) {
1185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      error("Tag content cannot appear inside CDATA element", elementName);
1195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return;
1205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    switch (HtmlTextEscapingMode.getModeForTag(elementName)) {
1235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      case CDATA:
1245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      case CDATA_SOMETIMES:
1255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      case PLAIN_TEXT:
1265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        lastTagOpened = elementName;
1275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        pendingUnescaped = new StringBuilder();
1285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        break;
1295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      default:
1305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    output.append('<').append(elementName);
1335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    for (Iterator<? extends String> attrIt = attrs.iterator();
1355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com         attrIt.hasNext();) {
1365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      String name = attrIt.next();
1375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      String value = attrIt.next();
1385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      name = HtmlLexer.canonicalName(name);
1395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if (!isValidHtmlName(name)) {
1405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        error("Invalid attr name", name);
1415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        continue;
1425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
1435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      output.append(' ').append(name).append('=').append('"');
1445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      escapeHtmlOnto(value, output);
1455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      output.append('"');
1465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    output.append('>');
1495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void closeTag(String elementName) {
1525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    try {
1535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      writeCloseTag(elementName);
1545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } catch (IOException ex) {
1555c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      ioExHandler.handle(ex);
1565c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final void writeCloseTag(String elementName)
1605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      throws IOException {
1615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!open) { throw new IllegalStateException(); }
1625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    elementName = HtmlLexer.canonicalName(elementName);
1635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!isValidHtmlName(elementName)) {
1645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      error("Invalid element name", elementName);
1655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      return;
1665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1675c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (pendingUnescaped != null) {
1695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if (!lastTagOpened.equals(elementName)) {
1705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        error("Tag content cannot appear inside CDATA element", elementName);
1715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        return;
1725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      } else {
1735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        String unescaped = pendingUnescaped.toString();
1745c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        pendingUnescaped = null;
1755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        if (!containsCloseTag(unescaped, lastTagOpened)) {
1765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output.append(unescaped);
1775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        } else {
1785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          error("Unescaped text content contains close tag", elementName);
1795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          // Still output the close tag.
1805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        }
1815c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
1825c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if ("plaintext".equals(elementName)) { return; }
1835c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1845c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    output.append("</").append(elementName).append(">");
1855c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1865c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1875c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  public final void text(String text) {
1885c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    try {
1895c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      writeText(text);
1905c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } catch (IOException ex) {
1915c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      ioExHandler.handle(ex);
1925c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
1935c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
1945c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
1955c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private final void writeText(String text) throws IOException {
1965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (!open) { throw new IllegalStateException(); }
1975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (pendingUnescaped != null) {
1985c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      pendingUnescaped.append(text.replaceAll("\0", ""));
1995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    } else {
2005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      escapeHtmlOnto(text, output);  // Works for RCDATA.
2015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
2035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  private static boolean containsCloseTag(String unescaped, String tagName) {
2055c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    boolean allowEscapingTextSpan = HtmlTextEscapingMode.allowsEscapingTextSpan(
2065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        tagName);
2075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2085c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    int unescapedLength = unescaped.length();
2095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    int tagNameLength = tagName.length();
2105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    int limit = unescapedLength - tagName.length() - 2;
2115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    for (int i = -1; (i = unescaped.indexOf('<', i + 1)) >= 0;) {
2125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if (i <= limit && '/' == unescaped.charAt(i + 1)
2135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          && Strings.regionMatchesIgnoreCase(
2145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com              unescaped, i + 2, tagName, 0, tagNameLength)) {
2155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        // Content cannot be embedded.
2165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        return true;
2175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      } else if (allowEscapingTextSpan && i + 4 <= unescapedLength
2185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com                 && '!' == unescaped.charAt(i + 1)
2195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com                 && '-' == unescaped.charAt(i + 2)
2205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com                 && '-' == unescaped.charAt(i + 3)) {
2215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        // HTML 5 allows the end of an escaping text span to share dashes with
2225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        // the open : <!--> and <!---> are both fully formed.
2235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        if (i + 4 < unescapedLength && unescaped.charAt(i + 4) == '>') {
2245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          i = i + 5;
2255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        } else if (i + 5 < unescapedLength
2265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com                   && unescaped.charAt(i + 4) == '-'
2275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com                   && unescaped.charAt(i + 5) == '>') {
2285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          i = i + 6;
2295c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        } else {
2305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          i = unescaped.indexOf("-->", i + 4);
2315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          if (i < 0) {
2325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            // If the escaping text span is not closed, then final close tag
2335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            // would be covered by the unclosed escaping text span.
2345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            return true;
2355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          }
2365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        }
2375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
2385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    return false;
2405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
2415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2425c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2435c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  @VisibleForTesting
2445c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  static boolean isValidHtmlName(String name) {
2455c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    int n = name.length();
2465c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (n == 0) { return false; }
2475c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    if (n > 128) { return false; }
2485c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    boolean isNamespaced = false;
2495c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    for (int i = 0; i < n; ++i) {
2505c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      char ch = name.charAt(i);
2515c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      switch (ch) {
2525c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case ':':
2535c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          if (isNamespaced) { return false; }
2545c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          isNamespaced = true;
2554e867904c8295537803c1c8a076e130df5674b58mikesamuel          if (i == 0 || i + 1 == n) { return false; }
2564e867904c8295537803c1c8a076e130df5674b58mikesamuel          break;
2575c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '-':
2585c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          if (i == 0 || i + 1 == n) { return false; }
2595c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
2605c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        default:
2615c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          if (ch <= '9') {
2625c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            if (i == 0 || ch < '0') { return false; }
2635c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          } else if ('A' <= ch && ch <= 'z') {
2645c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            if ('Z' < ch && ch < 'a') { return false; }
2655c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          } else {
2665c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            return false;
2675c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          }
2685c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
2695c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
2705c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
2715c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    return true;
2725c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
2735c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
2743a3d912deec6a8382422b602031d12fee7d4c73amikesamuel  @SuppressWarnings("fallthrough")
2755c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  static void escapeHtmlOnto(String plainText, Appendable output)
2765c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      throws IOException {
2775c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    int n = plainText.length();
2785c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    int pos = 0;
2795c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    for (int i = 0; i < n; ++i) {
2805c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      char ch = plainText.charAt(i);
2815c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      switch (ch) {
2825c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '<':
2835c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output.append(plainText, pos, i).append("&lt;");
2845c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          pos = i + 1;
2855c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
2865c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '>':
2875c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output.append(plainText, pos, i).append("&gt;");
2885c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          pos = i + 1;
2895c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
2905c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '&':
2915c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output.append(plainText, pos, i).append("&amp;");
2925c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          pos = i + 1;
2935c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
2945c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '"':
2955c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output.append(plainText, pos, i).append("&#34;");
2965c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          pos = i + 1;
2975c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
2984e867904c8295537803c1c8a076e130df5674b58mikesamuel        case '\r': case '\n': break;
2995c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        default:
3005c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          if (0x20 <= ch && ch < 0xff00) {
3015c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com            continue;
3025c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          }
3035c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          // Is a control character or possible full-width version of a
3045c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          // special character.
3050f3a7565157c70edb1935f04888fdc0407397fabmikesamuel          // FALL-THROUGH
3065c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '+':  // UTF-7
3075c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '=':  // Special in attributes.
3085c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '@':  // Conditional compilation
3095c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case '\'': case '`':  // Quoting character
3105c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output.append(plainText, pos, i).append("&#")
3115c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com              .append(String.valueOf((int) ch)).append(';');
3125c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          pos = i + 1;
3135c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
3145c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        case 0:
3155c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          output.append(plainText, pos, i);
3165c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          pos = i + 1;
3175c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com          break;
3185c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      }
3195c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
3205c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    output.append(plainText, pos, n);
3215c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
3225c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3235c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3245c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  static class CloseableHtmlStreamRenderer extends HtmlStreamRenderer
3255c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      implements Closeable {
3265c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    private final Closeable closeable;
3275c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3285c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    CloseableHtmlStreamRenderer(
3294e867904c8295537803c1c8a076e130df5674b58mikesamuel        @WillCloseWhenClosed
3305c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        Appendable output, Handler<? super IOException> errorHandler,
3315c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com        Handler<? super String> badHtmlHandler) {
3325c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      super(output, errorHandler, badHtmlHandler);
3335c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      this.closeable = (Closeable) output;
3345c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
3355c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com
3365c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    public void close() throws IOException {
3375c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      if (isDocumentOpen()) { closeDocument(); }
3385c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com      closeable.close();
3395c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com    }
3405c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com  }
3415c702c12be71d8070da9287cc4a044617dd726a7manico.james@gmail.com}
342