StructuralWhitespaceStripper.java revision 56ed4167b942ec265f9cee70ac4d71d10b3835ce
1/*
2 * Copyright (C) 2010 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.clearsilver.jsilver.syntax;
18
19import com.google.clearsilver.jsilver.syntax.analysis.DepthFirstAdapter;
20import com.google.clearsilver.jsilver.syntax.node.AAltCommand;
21import com.google.clearsilver.jsilver.syntax.node.ACallCommand;
22import com.google.clearsilver.jsilver.syntax.node.ADataCommand;
23import com.google.clearsilver.jsilver.syntax.node.ADefCommand;
24import com.google.clearsilver.jsilver.syntax.node.AEachCommand;
25import com.google.clearsilver.jsilver.syntax.node.AEscapeCommand;
26import com.google.clearsilver.jsilver.syntax.node.AEvarCommand;
27import com.google.clearsilver.jsilver.syntax.node.AIfCommand;
28import com.google.clearsilver.jsilver.syntax.node.ALoopCommand;
29import com.google.clearsilver.jsilver.syntax.node.ALoopIncCommand;
30import com.google.clearsilver.jsilver.syntax.node.ALoopToCommand;
31import com.google.clearsilver.jsilver.syntax.node.ALvarCommand;
32import com.google.clearsilver.jsilver.syntax.node.ANameCommand;
33import com.google.clearsilver.jsilver.syntax.node.ANoopCommand;
34import com.google.clearsilver.jsilver.syntax.node.ASetCommand;
35import com.google.clearsilver.jsilver.syntax.node.AUvarCommand;
36import com.google.clearsilver.jsilver.syntax.node.AVarCommand;
37import com.google.clearsilver.jsilver.syntax.node.AWithCommand;
38import com.google.clearsilver.jsilver.syntax.node.Start;
39import com.google.clearsilver.jsilver.syntax.node.TData;
40
41import java.util.ArrayList;
42import java.util.List;
43import java.util.regex.Matcher;
44import java.util.regex.Pattern;
45
46/**
47 * Detects sequences of commands corresponding to a line in the template containing only structural
48 * commands, comments or whitespace and rewrites the syntax tree to effectively remove any data
49 * (text) associated with that line (including the trailing whitespace).
50 * <p>
51 * A structural command is any command that never emits any output. These come in three types:
52 * <ul>
53 * <li>Commands that can contain other commands (eg, "alt", "each", "escape", "if", "loop", "with",
54 * etc...).
55 * <li>Commands that operate on the template itself (eg, "include", "autoescape", etc...).
56 * <li>Comments.
57 * </ul>
58 * <p>
59 * This makes it much easier to write human readable templates in cases where the output format is
60 * whitespace sensitive.
61 * <p>
62 * Thus the input:
63 *
64 * <pre>
65 * {@literal
66 * ----------------
67 * Value is:
68 * <?cs if:x>0 ?>
69 *   positive
70 * <?cs elif:x<0 ?>
71 *   negative
72 * <?cs else ?>
73 *   zero
74 * <?cs /if ?>.
75 * ----------------
76 * }
77 * </pre>
78 * is equivalent to:
79 *
80 * <pre>
81 * {@literal
82 * ----------------
83 * Value is:
84 * <?cs if:x>0 ?>  positive
85 * <?cs elif:x<0 ?>  negative
86 * <?cs else ?>  zero
87 * <?cs /if ?>.
88 * ----------------
89 * }
90 * </pre>
91 * but is much easier to read.
92 * <p>
93 * Where data commands become empty they are replaced with Noop commands, which effectively removes
94 * them from the tree. These can be removed (if needed) by a later optimization step but shouldn't
95 * cause any issues.
96 */
97public class StructuralWhitespaceStripper extends DepthFirstAdapter {
98  /**
99   * A regex snippet to match sequences of inline whitespace. The easiest way to define this is as
100   * "not (non-space or newline)".
101   */
102  private static final String IWS = "[^\\S\\n]*";
103
104  /** Pattern to match strings that consist only of inline whitespace. */
105  private static final Pattern INLINE_WHITESPACE = Pattern.compile(IWS);
106
107  /**
108   * Pattern to match strings that start with arbitrary (inline) whitespace, followed by a newline.
109   */
110  private static final Pattern STARTS_WITH_NEWLINE = Pattern.compile("^" + IWS + "\\n");
111
112  /**
113   * Pattern to match strings that end with a newline, followed by trailing (inline) whitespace.
114   */
115  private static final Pattern ENDS_WITH_NEWLINE = Pattern.compile("\\n" + IWS + "$");
116
117  /**
118   * Pattern to capture the content of a string after a leading newline. Only ever used on input
119   * that previously matched STARTS_WITH_NEWLINE.
120   */
121  private static final Pattern LEADING_WHITESPACE_AND_NEWLINE =
122      Pattern.compile("^" + IWS + "\\n(.*)$", Pattern.DOTALL);
123
124  /**
125   * Pattern to capture the content of a string before a trailing newline. Note that this may have
126   * to match text that has already had the final newline removed so we must greedily match the
127   * whitespace rather than the content.
128   */
129  private static final Pattern TRAILING_WHITESPACE =
130      Pattern.compile("^(.*?)" + IWS + "$", Pattern.DOTALL);
131
132  /**
133   * Flag to tell us if we are in whitespace chomping mode. By default we start in this mode because
134   * the content of the first line in a template is not preceded by a newline (but should behave as
135   * if it was). Once this flag has been set to false, it remains unset until a new line is
136   * encountered.
137   * <p>
138   * Note that we only actually remove whitespace when we find the terminating condition rather than
139   * when as visit the nodes (ie, this mode can be aborted and any visited whitespace will be left
140   * untouched).
141   */
142  private boolean maybeChompWhitespace = true;
143
144  /**
145   * Flag to tell us if the line we are processing has an inline command in it.
146   * <p>
147   * An inline command is a complex command (eg. 'if', 'loop') where both the start and end of the
148   * command exists on the same line. Non-complex commands (eg. 'var', 'name') cannot be considered
149   * inline.
150   * <p>
151   * This flag is set when we process the start of a complex command and unset when we finish
152   * processing a line. Thus if the flag is still true when we encounter the end of a complex
153   * command, it tells us that (at least one) complex command was entirely contained within the
154   * current line and that we should stop chomping whitespace for the current line.
155   * <p>
156   * This means we can detect input such as:
157   *
158   * <pre>
159   * {@literal <?cs if:x?>   <?cs /if?>}
160   * </pre>
161   * for which the trailing newline and surrounding whitespace should not be removed, as opposed to:
162   *
163   * <pre>
164   * {@literal <?cs if:x?>
165   *    something
166   *  <?cs /if?>
167   * }
168   * </pre>
169   * where the trailing newlines for both the opening and closing of the 'if' command should be
170   * removed.
171   */
172  private boolean currentLineContainsInlineComplexCommand = false;
173
174  /**
175   * First data command we saw when we started 'chomping' whitespace (note that this can be null if
176   * we are at the beginning of a file or when we have chomped a previous data command down to
177   * nothing).
178   */
179  private ADataCommand firstChompedData = null;
180
181  /**
182   * Intermediate whitespace-only data commands that we may need to remove.
183   * <p>
184   * This list is built up as we visit commands and is either processed when we need to remove
185   * structural whitespace or cleared if we encounter situations that prohibit whitespace removal.
186   */
187  private List<ADataCommand> whitespaceData = new ArrayList<ADataCommand>();
188
189  private static boolean isInlineWhitespace(String text) {
190    return INLINE_WHITESPACE.matcher(text).matches();
191  }
192
193  private static boolean startsWithNewline(String text) {
194    return STARTS_WITH_NEWLINE.matcher(text).find();
195  }
196
197  private static boolean endsWithNewline(String text) {
198    return ENDS_WITH_NEWLINE.matcher(text).find();
199  }
200
201  /**
202   * Removes leading whitespace (including first newline) from the given string. The text must start
203   * with optional whitespace followed by a newline.
204   */
205  private static String stripLeadingWhitespaceAndNewline(String text) {
206    Matcher matcher = LEADING_WHITESPACE_AND_NEWLINE.matcher(text);
207    if (!matcher.matches()) {
208      throw new IllegalStateException("Text '" + text + "' should have leading whitespace/newline.");
209    }
210    return matcher.group(1);
211  }
212
213  /**
214   * Removes trailing whitespace (if present) from the given string.
215   */
216  private static String stripTrailingWhitespace(String text) {
217    Matcher matcher = TRAILING_WHITESPACE.matcher(text);
218    if (!matcher.matches()) {
219      // The trailing whitespace regex should never fail to match a string.
220      throw new AssertionError("Error in regular expression");
221    }
222    return matcher.group(1);
223  }
224
225  /**
226   * Remove whitespace (including first newline) from the start of the given data command (replacing
227   * it with a Noop command if it becomes empty). Returns a modified data command, or null if all
228   * text was removed.
229   * <p>
230   * The given command can be null at the beginning of the file or if the original data command was
231   * entirely consumed by a previous strip operation (remember that data commands can be processed
232   * twice, at both the start and end of a whitespace sequence).
233   */
234  private static ADataCommand stripLeadingWhitespaceAndNewline(ADataCommand data) {
235    if (data != null) {
236      String text = stripLeadingWhitespaceAndNewline(data.getData().getText());
237      if (text.isEmpty()) {
238        data.replaceBy(new ANoopCommand());
239        // Returning null just means we have chomped the whitespace to nothing.
240        data = null;
241      } else {
242        data.setData(new TData(text));
243      }
244    }
245    return data;
246  }
247
248  /**
249   * Removes whitespace from the end of the given data command (replacing it with a Noop command if
250   * it becomes empty).
251   */
252  private static void stripTrailingWhitespace(ADataCommand data) {
253    if (data != null) {
254      String text = stripTrailingWhitespace(data.getData().getText());
255      if (text.isEmpty()) {
256        data.replaceBy(new ANoopCommand());
257      } else {
258        data.setData(new TData(text));
259      }
260    }
261  }
262
263  /**
264   * Removes all data commands collected while chomping the current line and clears the given list.
265   */
266  private static void removeWhitespace(List<ADataCommand> whitespaceData) {
267    for (ADataCommand data : whitespaceData) {
268      data.replaceBy(new ANoopCommand());
269    }
270    whitespaceData.clear();
271  }
272
273  @Override
274  public void caseStart(Start node) {
275    // Process the hierarchy.
276    super.caseStart(node);
277    // We might end after processing a non-data node, so deal with any
278    // unprocessed whitespace before we exit.
279    if (maybeChompWhitespace) {
280      stripTrailingWhitespace(firstChompedData);
281      removeWhitespace(whitespaceData);
282      firstChompedData = null;
283    }
284    // Verify we have consumed (and cleared) any object references.
285    if (firstChompedData != null) {
286      throw new IllegalStateException("Unexpected first data node.");
287    }
288    if (!whitespaceData.isEmpty()) {
289      throw new IllegalStateException("Unexpected data nodes.");
290    }
291  }
292
293  @Override
294  public void caseADataCommand(ADataCommand data) {
295    final String originalText = data.getData().getText();
296    if (maybeChompWhitespace) {
297      if (isInlineWhitespace(originalText)) {
298        // This data command is whitespace between two commands on the same
299        // line, simply chomp it and continue ("Om-nom-nom").
300        whitespaceData.add(data);
301        return;
302      }
303      if (startsWithNewline(originalText)) {
304        // This data command is at the end of a line that contains only
305        // structural commands and whitespace. We remove all whitespace
306        // associated with this line by:
307        // * Stripping whitespace from the end of the data command at the start
308        // of this line.
309        // * Removing all intermediate (whitespace only) data commands.
310        // * Stripping whitespace from the start of the current data command.
311        stripTrailingWhitespace(firstChompedData);
312        removeWhitespace(whitespaceData);
313        data = stripLeadingWhitespaceAndNewline(data);
314        currentLineContainsInlineComplexCommand = false;
315      } else {
316        // This data command contains some non-whitespace text so we must abort
317        // the chomping of this line and output it normally.
318        abortWhitespaceChompingForCurrentLine();
319      }
320    }
321    // Test to see if we should start chomping on the next line.
322    maybeChompWhitespace = endsWithNewline(originalText);
323    // Note that data can be null here if we stripped all the whitespace from
324    // it (which means that firstChompedData can be null next time around).
325    firstChompedData = maybeChompWhitespace ? data : null;
326  }
327
328  /**
329   * Helper method to abort whitespace processing for the current line. This method is idempotent on
330   * a per line basis, and once it has been called the state is only reset at the start of the next
331   * line.
332   */
333  private void abortWhitespaceChompingForCurrentLine() {
334    maybeChompWhitespace = false;
335    currentLineContainsInlineComplexCommand = false;
336    whitespaceData.clear();
337  }
338
339  // ---- Inline commands that prohibit whitespace removal. ----
340
341  @Override
342  public void inAAltCommand(AAltCommand node) {
343    abortWhitespaceChompingForCurrentLine();
344  }
345
346  @Override
347  public void inACallCommand(ACallCommand node) {
348    abortWhitespaceChompingForCurrentLine();
349  }
350
351  @Override
352  public void inAEvarCommand(AEvarCommand node) {
353    abortWhitespaceChompingForCurrentLine();
354  }
355
356  @Override
357  public void inALvarCommand(ALvarCommand node) {
358    abortWhitespaceChompingForCurrentLine();
359  }
360
361  @Override
362  public void inANameCommand(ANameCommand node) {
363    abortWhitespaceChompingForCurrentLine();
364  }
365
366  @Override
367  public void inASetCommand(ASetCommand node) {
368    abortWhitespaceChompingForCurrentLine();
369  }
370
371  @Override
372  public void inAUvarCommand(AUvarCommand node) {
373    abortWhitespaceChompingForCurrentLine();
374  }
375
376  @Override
377  public void inAVarCommand(AVarCommand node) {
378    abortWhitespaceChompingForCurrentLine();
379  }
380
381  // ---- Two part (open/close) commands that can have child commands. ----
382
383  public void enterComplexCommand() {
384    currentLineContainsInlineComplexCommand = true;
385  }
386
387  public void exitComplexCommand() {
388    if (currentLineContainsInlineComplexCommand) {
389      abortWhitespaceChompingForCurrentLine();
390    }
391  }
392
393  @Override
394  public void caseAAltCommand(AAltCommand node) {
395    enterComplexCommand();
396    super.caseAAltCommand(node);
397    exitComplexCommand();
398  }
399
400  @Override
401  public void caseADefCommand(ADefCommand node) {
402    enterComplexCommand();
403    super.caseADefCommand(node);
404    exitComplexCommand();
405  }
406
407  @Override
408  public void caseAEachCommand(AEachCommand node) {
409    enterComplexCommand();
410    super.caseAEachCommand(node);
411    exitComplexCommand();
412  }
413
414  @Override
415  public void caseAEscapeCommand(AEscapeCommand node) {
416    enterComplexCommand();
417    super.caseAEscapeCommand(node);
418    exitComplexCommand();
419  }
420
421  @Override
422  public void caseAIfCommand(AIfCommand node) {
423    enterComplexCommand();
424    super.caseAIfCommand(node);
425    exitComplexCommand();
426  }
427
428  @Override
429  public void caseALoopCommand(ALoopCommand node) {
430    enterComplexCommand();
431    super.caseALoopCommand(node);
432    exitComplexCommand();
433  }
434
435  @Override
436  public void caseALoopIncCommand(ALoopIncCommand node) {
437    enterComplexCommand();
438    super.caseALoopIncCommand(node);
439    exitComplexCommand();
440  }
441
442  @Override
443  public void caseALoopToCommand(ALoopToCommand node) {
444    enterComplexCommand();
445    super.caseALoopToCommand(node);
446    exitComplexCommand();
447  }
448
449  @Override
450  public void caseAWithCommand(AWithCommand node) {
451    enterComplexCommand();
452    super.caseAWithCommand(node);
453    exitComplexCommand();
454  }
455}
456