1/* 2 * Copyright (C) 2010 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.google.clearsilver.jsilver.syntax; 18 19import com.google.clearsilver.jsilver.syntax.analysis.DepthFirstAdapter; 20import com.google.clearsilver.jsilver.syntax.node.AAltCommand; 21import com.google.clearsilver.jsilver.syntax.node.ACallCommand; 22import com.google.clearsilver.jsilver.syntax.node.ADataCommand; 23import com.google.clearsilver.jsilver.syntax.node.ADefCommand; 24import com.google.clearsilver.jsilver.syntax.node.AEachCommand; 25import com.google.clearsilver.jsilver.syntax.node.AEscapeCommand; 26import com.google.clearsilver.jsilver.syntax.node.AEvarCommand; 27import com.google.clearsilver.jsilver.syntax.node.AIfCommand; 28import com.google.clearsilver.jsilver.syntax.node.ALoopCommand; 29import com.google.clearsilver.jsilver.syntax.node.ALoopIncCommand; 30import com.google.clearsilver.jsilver.syntax.node.ALoopToCommand; 31import com.google.clearsilver.jsilver.syntax.node.ALvarCommand; 32import com.google.clearsilver.jsilver.syntax.node.ANameCommand; 33import com.google.clearsilver.jsilver.syntax.node.ANoopCommand; 34import com.google.clearsilver.jsilver.syntax.node.ASetCommand; 35import com.google.clearsilver.jsilver.syntax.node.AUvarCommand; 36import com.google.clearsilver.jsilver.syntax.node.AVarCommand; 37import com.google.clearsilver.jsilver.syntax.node.AWithCommand; 38import com.google.clearsilver.jsilver.syntax.node.Start; 39import com.google.clearsilver.jsilver.syntax.node.TData; 40 41import java.util.ArrayList; 42import java.util.List; 43import java.util.regex.Matcher; 44import java.util.regex.Pattern; 45 46/** 47 * Detects sequences of commands corresponding to a line in the template containing only structural 48 * commands, comments or whitespace and rewrites the syntax tree to effectively remove any data 49 * (text) associated with that line (including the trailing whitespace). 50 * <p> 51 * A structural command is any command that never emits any output. These come in three types: 52 * <ul> 53 * <li>Commands that can contain other commands (eg, "alt", "each", "escape", "if", "loop", "with", 54 * etc...). 55 * <li>Commands that operate on the template itself (eg, "include", "autoescape", etc...). 56 * <li>Comments. 57 * </ul> 58 * <p> 59 * This makes it much easier to write human readable templates in cases where the output format is 60 * whitespace sensitive. 61 * <p> 62 * Thus the input: 63 * 64 * <pre> 65 * {@literal 66 * ---------------- 67 * Value is: 68 * <?cs if:x>0 ?> 69 * positive 70 * <?cs elif:x<0 ?> 71 * negative 72 * <?cs else ?> 73 * zero 74 * <?cs /if ?>. 75 * ---------------- 76 * } 77 * </pre> 78 * is equivalent to: 79 * 80 * <pre> 81 * {@literal 82 * ---------------- 83 * Value is: 84 * <?cs if:x>0 ?> positive 85 * <?cs elif:x<0 ?> negative 86 * <?cs else ?> zero 87 * <?cs /if ?>. 88 * ---------------- 89 * } 90 * </pre> 91 * but is much easier to read. 92 * <p> 93 * Where data commands become empty they are replaced with Noop commands, which effectively removes 94 * them from the tree. These can be removed (if needed) by a later optimization step but shouldn't 95 * cause any issues. 96 */ 97public class StructuralWhitespaceStripper extends DepthFirstAdapter { 98 /** 99 * A regex snippet to match sequences of inline whitespace. The easiest way to define this is as 100 * "not (non-space or newline)". 101 */ 102 private static final String IWS = "[^\\S\\n]*"; 103 104 /** Pattern to match strings that consist only of inline whitespace. */ 105 private static final Pattern INLINE_WHITESPACE = Pattern.compile(IWS); 106 107 /** 108 * Pattern to match strings that start with arbitrary (inline) whitespace, followed by a newline. 109 */ 110 private static final Pattern STARTS_WITH_NEWLINE = Pattern.compile("^" + IWS + "\\n"); 111 112 /** 113 * Pattern to match strings that end with a newline, followed by trailing (inline) whitespace. 114 */ 115 private static final Pattern ENDS_WITH_NEWLINE = Pattern.compile("\\n" + IWS + "$"); 116 117 /** 118 * Pattern to capture the content of a string after a leading newline. Only ever used on input 119 * that previously matched STARTS_WITH_NEWLINE. 120 */ 121 private static final Pattern LEADING_WHITESPACE_AND_NEWLINE = 122 Pattern.compile("^" + IWS + "\\n(.*)$", Pattern.DOTALL); 123 124 /** 125 * Pattern to capture the content of a string before a trailing newline. Note that this may have 126 * to match text that has already had the final newline removed so we must greedily match the 127 * whitespace rather than the content. 128 */ 129 private static final Pattern TRAILING_WHITESPACE = 130 Pattern.compile("^(.*?)" + IWS + "$", Pattern.DOTALL); 131 132 /** 133 * Flag to tell us if we are in whitespace chomping mode. By default we start in this mode because 134 * the content of the first line in a template is not preceded by a newline (but should behave as 135 * if it was). Once this flag has been set to false, it remains unset until a new line is 136 * encountered. 137 * <p> 138 * Note that we only actually remove whitespace when we find the terminating condition rather than 139 * when as visit the nodes (ie, this mode can be aborted and any visited whitespace will be left 140 * untouched). 141 */ 142 private boolean maybeChompWhitespace = true; 143 144 /** 145 * Flag to tell us if the line we are processing has an inline command in it. 146 * <p> 147 * An inline command is a complex command (eg. 'if', 'loop') where both the start and end of the 148 * command exists on the same line. Non-complex commands (eg. 'var', 'name') cannot be considered 149 * inline. 150 * <p> 151 * This flag is set when we process the start of a complex command and unset when we finish 152 * processing a line. Thus if the flag is still true when we encounter the end of a complex 153 * command, it tells us that (at least one) complex command was entirely contained within the 154 * current line and that we should stop chomping whitespace for the current line. 155 * <p> 156 * This means we can detect input such as: 157 * 158 * <pre> 159 * {@literal <?cs if:x?> <?cs /if?>} 160 * </pre> 161 * for which the trailing newline and surrounding whitespace should not be removed, as opposed to: 162 * 163 * <pre> 164 * {@literal <?cs if:x?> 165 * something 166 * <?cs /if?> 167 * } 168 * </pre> 169 * where the trailing newlines for both the opening and closing of the 'if' command should be 170 * removed. 171 */ 172 private boolean currentLineContainsInlineComplexCommand = false; 173 174 /** 175 * First data command we saw when we started 'chomping' whitespace (note that this can be null if 176 * we are at the beginning of a file or when we have chomped a previous data command down to 177 * nothing). 178 */ 179 private ADataCommand firstChompedData = null; 180 181 /** 182 * Intermediate whitespace-only data commands that we may need to remove. 183 * <p> 184 * This list is built up as we visit commands and is either processed when we need to remove 185 * structural whitespace or cleared if we encounter situations that prohibit whitespace removal. 186 */ 187 private List<ADataCommand> whitespaceData = new ArrayList<ADataCommand>(); 188 189 private static boolean isInlineWhitespace(String text) { 190 return INLINE_WHITESPACE.matcher(text).matches(); 191 } 192 193 private static boolean startsWithNewline(String text) { 194 return STARTS_WITH_NEWLINE.matcher(text).find(); 195 } 196 197 private static boolean endsWithNewline(String text) { 198 return ENDS_WITH_NEWLINE.matcher(text).find(); 199 } 200 201 /** 202 * Removes leading whitespace (including first newline) from the given string. The text must start 203 * with optional whitespace followed by a newline. 204 */ 205 private static String stripLeadingWhitespaceAndNewline(String text) { 206 Matcher matcher = LEADING_WHITESPACE_AND_NEWLINE.matcher(text); 207 if (!matcher.matches()) { 208 throw new IllegalStateException("Text '" + text + "' should have leading whitespace/newline."); 209 } 210 return matcher.group(1); 211 } 212 213 /** 214 * Removes trailing whitespace (if present) from the given string. 215 */ 216 private static String stripTrailingWhitespace(String text) { 217 Matcher matcher = TRAILING_WHITESPACE.matcher(text); 218 if (!matcher.matches()) { 219 // The trailing whitespace regex should never fail to match a string. 220 throw new AssertionError("Error in regular expression"); 221 } 222 return matcher.group(1); 223 } 224 225 /** 226 * Remove whitespace (including first newline) from the start of the given data command (replacing 227 * it with a Noop command if it becomes empty). Returns a modified data command, or null if all 228 * text was removed. 229 * <p> 230 * The given command can be null at the beginning of the file or if the original data command was 231 * entirely consumed by a previous strip operation (remember that data commands can be processed 232 * twice, at both the start and end of a whitespace sequence). 233 */ 234 private static ADataCommand stripLeadingWhitespaceAndNewline(ADataCommand data) { 235 if (data != null) { 236 String text = stripLeadingWhitespaceAndNewline(data.getData().getText()); 237 if (text.isEmpty()) { 238 data.replaceBy(new ANoopCommand()); 239 // Returning null just means we have chomped the whitespace to nothing. 240 data = null; 241 } else { 242 data.setData(new TData(text)); 243 } 244 } 245 return data; 246 } 247 248 /** 249 * Removes whitespace from the end of the given data command (replacing it with a Noop command if 250 * it becomes empty). 251 */ 252 private static void stripTrailingWhitespace(ADataCommand data) { 253 if (data != null) { 254 String text = stripTrailingWhitespace(data.getData().getText()); 255 if (text.isEmpty()) { 256 data.replaceBy(new ANoopCommand()); 257 } else { 258 data.setData(new TData(text)); 259 } 260 } 261 } 262 263 /** 264 * Removes all data commands collected while chomping the current line and clears the given list. 265 */ 266 private static void removeWhitespace(List<ADataCommand> whitespaceData) { 267 for (ADataCommand data : whitespaceData) { 268 data.replaceBy(new ANoopCommand()); 269 } 270 whitespaceData.clear(); 271 } 272 273 @Override 274 public void caseStart(Start node) { 275 // Process the hierarchy. 276 super.caseStart(node); 277 // We might end after processing a non-data node, so deal with any 278 // unprocessed whitespace before we exit. 279 if (maybeChompWhitespace) { 280 stripTrailingWhitespace(firstChompedData); 281 removeWhitespace(whitespaceData); 282 firstChompedData = null; 283 } 284 // Verify we have consumed (and cleared) any object references. 285 if (firstChompedData != null) { 286 throw new IllegalStateException("Unexpected first data node."); 287 } 288 if (!whitespaceData.isEmpty()) { 289 throw new IllegalStateException("Unexpected data nodes."); 290 } 291 } 292 293 @Override 294 public void caseADataCommand(ADataCommand data) { 295 final String originalText = data.getData().getText(); 296 if (maybeChompWhitespace) { 297 if (isInlineWhitespace(originalText)) { 298 // This data command is whitespace between two commands on the same 299 // line, simply chomp it and continue ("Om-nom-nom"). 300 whitespaceData.add(data); 301 return; 302 } 303 if (startsWithNewline(originalText)) { 304 // This data command is at the end of a line that contains only 305 // structural commands and whitespace. We remove all whitespace 306 // associated with this line by: 307 // * Stripping whitespace from the end of the data command at the start 308 // of this line. 309 // * Removing all intermediate (whitespace only) data commands. 310 // * Stripping whitespace from the start of the current data command. 311 stripTrailingWhitespace(firstChompedData); 312 removeWhitespace(whitespaceData); 313 data = stripLeadingWhitespaceAndNewline(data); 314 currentLineContainsInlineComplexCommand = false; 315 } else { 316 // This data command contains some non-whitespace text so we must abort 317 // the chomping of this line and output it normally. 318 abortWhitespaceChompingForCurrentLine(); 319 } 320 } 321 // Test to see if we should start chomping on the next line. 322 maybeChompWhitespace = endsWithNewline(originalText); 323 // Note that data can be null here if we stripped all the whitespace from 324 // it (which means that firstChompedData can be null next time around). 325 firstChompedData = maybeChompWhitespace ? data : null; 326 } 327 328 /** 329 * Helper method to abort whitespace processing for the current line. This method is idempotent on 330 * a per line basis, and once it has been called the state is only reset at the start of the next 331 * line. 332 */ 333 private void abortWhitespaceChompingForCurrentLine() { 334 maybeChompWhitespace = false; 335 currentLineContainsInlineComplexCommand = false; 336 whitespaceData.clear(); 337 } 338 339 // ---- Inline commands that prohibit whitespace removal. ---- 340 341 @Override 342 public void inAAltCommand(AAltCommand node) { 343 abortWhitespaceChompingForCurrentLine(); 344 } 345 346 @Override 347 public void inACallCommand(ACallCommand node) { 348 abortWhitespaceChompingForCurrentLine(); 349 } 350 351 @Override 352 public void inAEvarCommand(AEvarCommand node) { 353 abortWhitespaceChompingForCurrentLine(); 354 } 355 356 @Override 357 public void inALvarCommand(ALvarCommand node) { 358 abortWhitespaceChompingForCurrentLine(); 359 } 360 361 @Override 362 public void inANameCommand(ANameCommand node) { 363 abortWhitespaceChompingForCurrentLine(); 364 } 365 366 @Override 367 public void inASetCommand(ASetCommand node) { 368 abortWhitespaceChompingForCurrentLine(); 369 } 370 371 @Override 372 public void inAUvarCommand(AUvarCommand node) { 373 abortWhitespaceChompingForCurrentLine(); 374 } 375 376 @Override 377 public void inAVarCommand(AVarCommand node) { 378 abortWhitespaceChompingForCurrentLine(); 379 } 380 381 // ---- Two part (open/close) commands that can have child commands. ---- 382 383 public void enterComplexCommand() { 384 currentLineContainsInlineComplexCommand = true; 385 } 386 387 public void exitComplexCommand() { 388 if (currentLineContainsInlineComplexCommand) { 389 abortWhitespaceChompingForCurrentLine(); 390 } 391 } 392 393 @Override 394 public void caseAAltCommand(AAltCommand node) { 395 enterComplexCommand(); 396 super.caseAAltCommand(node); 397 exitComplexCommand(); 398 } 399 400 @Override 401 public void caseADefCommand(ADefCommand node) { 402 enterComplexCommand(); 403 super.caseADefCommand(node); 404 exitComplexCommand(); 405 } 406 407 @Override 408 public void caseAEachCommand(AEachCommand node) { 409 enterComplexCommand(); 410 super.caseAEachCommand(node); 411 exitComplexCommand(); 412 } 413 414 @Override 415 public void caseAEscapeCommand(AEscapeCommand node) { 416 enterComplexCommand(); 417 super.caseAEscapeCommand(node); 418 exitComplexCommand(); 419 } 420 421 @Override 422 public void caseAIfCommand(AIfCommand node) { 423 enterComplexCommand(); 424 super.caseAIfCommand(node); 425 exitComplexCommand(); 426 } 427 428 @Override 429 public void caseALoopCommand(ALoopCommand node) { 430 enterComplexCommand(); 431 super.caseALoopCommand(node); 432 exitComplexCommand(); 433 } 434 435 @Override 436 public void caseALoopIncCommand(ALoopIncCommand node) { 437 enterComplexCommand(); 438 super.caseALoopIncCommand(node); 439 exitComplexCommand(); 440 } 441 442 @Override 443 public void caseALoopToCommand(ALoopToCommand node) { 444 enterComplexCommand(); 445 super.caseALoopToCommand(node); 446 exitComplexCommand(); 447 } 448 449 @Override 450 public void caseAWithCommand(AWithCommand node) { 451 enterComplexCommand(); 452 super.caseAWithCommand(node); 453 exitComplexCommand(); 454 } 455} 456