/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.clearsilver.jsilver.syntax; import com.google.clearsilver.jsilver.syntax.analysis.DepthFirstAdapter; import com.google.clearsilver.jsilver.syntax.node.AAltCommand; import com.google.clearsilver.jsilver.syntax.node.ACallCommand; import com.google.clearsilver.jsilver.syntax.node.ADataCommand; import com.google.clearsilver.jsilver.syntax.node.ADefCommand; import com.google.clearsilver.jsilver.syntax.node.AEachCommand; import com.google.clearsilver.jsilver.syntax.node.AEscapeCommand; import com.google.clearsilver.jsilver.syntax.node.AEvarCommand; import com.google.clearsilver.jsilver.syntax.node.AIfCommand; import com.google.clearsilver.jsilver.syntax.node.ALoopCommand; import com.google.clearsilver.jsilver.syntax.node.ALoopIncCommand; import com.google.clearsilver.jsilver.syntax.node.ALoopToCommand; import com.google.clearsilver.jsilver.syntax.node.ALvarCommand; import com.google.clearsilver.jsilver.syntax.node.ANameCommand; import com.google.clearsilver.jsilver.syntax.node.ANoopCommand; import com.google.clearsilver.jsilver.syntax.node.ASetCommand; import com.google.clearsilver.jsilver.syntax.node.AUvarCommand; import com.google.clearsilver.jsilver.syntax.node.AVarCommand; import com.google.clearsilver.jsilver.syntax.node.AWithCommand; import com.google.clearsilver.jsilver.syntax.node.Start; import com.google.clearsilver.jsilver.syntax.node.TData; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Detects sequences of commands corresponding to a line in the template containing only structural * commands, comments or whitespace and rewrites the syntax tree to effectively remove any data * (text) associated with that line (including the trailing whitespace). *
* A structural command is any command that never emits any output. These come in three types: *
* This makes it much easier to write human readable templates in cases where the output format is * whitespace sensitive. *
* Thus the input: * *
* {@literal * ---------------- * Value is: * 0 ?> * positive * * negative * * zero * . * ---------------- * } ** is equivalent to: * *
* {@literal * ---------------- * Value is: * 0 ?> positive * negative * zero * . * ---------------- * } ** but is much easier to read. *
* Where data commands become empty they are replaced with Noop commands, which effectively removes * them from the tree. These can be removed (if needed) by a later optimization step but shouldn't * cause any issues. */ public class StructuralWhitespaceStripper extends DepthFirstAdapter { /** * A regex snippet to match sequences of inline whitespace. The easiest way to define this is as * "not (non-space or newline)". */ private static final String IWS = "[^\\S\\n]*"; /** Pattern to match strings that consist only of inline whitespace. */ private static final Pattern INLINE_WHITESPACE = Pattern.compile(IWS); /** * Pattern to match strings that start with arbitrary (inline) whitespace, followed by a newline. */ private static final Pattern STARTS_WITH_NEWLINE = Pattern.compile("^" + IWS + "\\n"); /** * Pattern to match strings that end with a newline, followed by trailing (inline) whitespace. */ private static final Pattern ENDS_WITH_NEWLINE = Pattern.compile("\\n" + IWS + "$"); /** * Pattern to capture the content of a string after a leading newline. Only ever used on input * that previously matched STARTS_WITH_NEWLINE. */ private static final Pattern LEADING_WHITESPACE_AND_NEWLINE = Pattern.compile("^" + IWS + "\\n(.*)$", Pattern.DOTALL); /** * Pattern to capture the content of a string before a trailing newline. Note that this may have * to match text that has already had the final newline removed so we must greedily match the * whitespace rather than the content. */ private static final Pattern TRAILING_WHITESPACE = Pattern.compile("^(.*?)" + IWS + "$", Pattern.DOTALL); /** * Flag to tell us if we are in whitespace chomping mode. By default we start in this mode because * the content of the first line in a template is not preceded by a newline (but should behave as * if it was). Once this flag has been set to false, it remains unset until a new line is * encountered. *
* Note that we only actually remove whitespace when we find the terminating condition rather than * when as visit the nodes (ie, this mode can be aborted and any visited whitespace will be left * untouched). */ private boolean maybeChompWhitespace = true; /** * Flag to tell us if the line we are processing has an inline command in it. *
* An inline command is a complex command (eg. 'if', 'loop') where both the start and end of the * command exists on the same line. Non-complex commands (eg. 'var', 'name') cannot be considered * inline. *
* This flag is set when we process the start of a complex command and unset when we finish * processing a line. Thus if the flag is still true when we encounter the end of a complex * command, it tells us that (at least one) complex command was entirely contained within the * current line and that we should stop chomping whitespace for the current line. *
* This means we can detect input such as: * *
* {@literal } ** for which the trailing newline and surrounding whitespace should not be removed, as opposed to: * *
* {@literal * something * * } ** where the trailing newlines for both the opening and closing of the 'if' command should be * removed. */ private boolean currentLineContainsInlineComplexCommand = false; /** * First data command we saw when we started 'chomping' whitespace (note that this can be null if * we are at the beginning of a file or when we have chomped a previous data command down to * nothing). */ private ADataCommand firstChompedData = null; /** * Intermediate whitespace-only data commands that we may need to remove. *
* This list is built up as we visit commands and is either processed when we need to remove
* structural whitespace or cleared if we encounter situations that prohibit whitespace removal.
*/
private List
* The given command can be null at the beginning of the file or if the original data command was
* entirely consumed by a previous strip operation (remember that data commands can be processed
* twice, at both the start and end of a whitespace sequence).
*/
private static ADataCommand stripLeadingWhitespaceAndNewline(ADataCommand data) {
if (data != null) {
String text = stripLeadingWhitespaceAndNewline(data.getData().getText());
if (text.isEmpty()) {
data.replaceBy(new ANoopCommand());
// Returning null just means we have chomped the whitespace to nothing.
data = null;
} else {
data.setData(new TData(text));
}
}
return data;
}
/**
* Removes whitespace from the end of the given data command (replacing it with a Noop command if
* it becomes empty).
*/
private static void stripTrailingWhitespace(ADataCommand data) {
if (data != null) {
String text = stripTrailingWhitespace(data.getData().getText());
if (text.isEmpty()) {
data.replaceBy(new ANoopCommand());
} else {
data.setData(new TData(text));
}
}
}
/**
* Removes all data commands collected while chomping the current line and clears the given list.
*/
private static void removeWhitespace(List