/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.clearsilver.jsilver.template; import java.io.IOException; /** * HTML whitespace stripper to be used by JSilver. It removes leading and * trailing whitespace, it reduces contiguous whitespace characters with just * the first character, and removes lines of nothing but whitespace. * * It does not strip whitespace inside the following elements: * * It also strips out empty lines and leading whitespace inside HTML tags (i.e. * between '<' and '>') and inside SCRIPT elements. It leaves trailing * whitespace since that is more costly to remove and tends to not be common * based on how templates are created (they don't have trailing whitespace). *

* Loadtests indicate that this class can strip whitespace almost as quickly * as just reading every character from a string (20% slower). *

* While not strictly compatible with the JNI Clearsilver whitestripping * function, we are not aware of any differences that yield functionally * different HTML output. However, we encourage users to verify for themselves * and report any differences. */ public class HtmlWhiteSpaceStripper implements Appendable { // Object to output stripped content to. private final Appendable out; // Level of whitespace stripping to perform. (Currently not used). // TODO: Determine what the exact differences are in levels in // JNI Clearsilver and see if it is worth porting it. private final int level; // Has any non-whitespace character been seen since the start of the line. private boolean nonWsSeen = false; // Was there previously one or more whitespace chars? If so, we should output // the first whitespace char in the sequence before any other non-whitespace // character. 0 signifies no pending whitespace. private char pendingWs = 0; // We just saw the start of an HTML tag '<'. private boolean startHtmlTag = false; // Are we currently in an opening HTML tag (not " tags private int textAreaScope = 0; // Are we between

 tags
  private int preScope = 0;
  // Are we between verbatim flags
  private int verbatimScope = 0;
  // Are we between