1/* 2 * Copyright (C) 2010 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.google.clearsilver.jsilver.syntax; 18 19import com.google.clearsilver.jsilver.syntax.analysis.DepthFirstAdapter; 20import com.google.clearsilver.jsilver.syntax.node.AAddExpression; 21import com.google.clearsilver.jsilver.syntax.node.AEscapeCommand; 22import com.google.clearsilver.jsilver.syntax.node.AFunctionExpression; 23import com.google.clearsilver.jsilver.syntax.node.AMultipleCommand; 24import com.google.clearsilver.jsilver.syntax.node.ANameVariable; 25import com.google.clearsilver.jsilver.syntax.node.AStringExpression; 26import com.google.clearsilver.jsilver.syntax.node.AVarCommand; 27import com.google.clearsilver.jsilver.syntax.node.Node; 28import com.google.clearsilver.jsilver.syntax.node.PCommand; 29import com.google.clearsilver.jsilver.syntax.node.PExpression; 30import com.google.clearsilver.jsilver.syntax.node.PPosition; 31import com.google.clearsilver.jsilver.syntax.node.PVariable; 32import com.google.clearsilver.jsilver.syntax.node.TString; 33 34import java.util.Collection; 35import java.util.LinkedList; 36 37/** 38 * Recursively optimizes the syntax tree with a set of simple operations. This class currently 39 * optimizes: 40 * <ul> 41 * <li>String concatenation in var commands 42 * <li>Function calls to escaping functions 43 * </ul> 44 * <p> 45 * String add expressions in var commands are optimized by replacing something like: 46 * 47 * <pre> 48 * <cs? var:a + b ?> 49 * </pre> 50 * with: 51 * 52 * <pre> 53 * <cs? var:a ?><cs? var:b ?> 54 * </pre> 55 * 56 * This avoids having to construct the intermediate result {@code a + b} at runtime and reduces 57 * runtime heap allocations. 58 * <p> 59 * Functions call to escaping functions are optimized by replacing them with the equivalent escaping 60 * construct. This is faster because escapers are called with the strings themselves whereas general 61 * function calls require value objects to be created. 62 * <p> 63 * Expressions such as: 64 * 65 * <pre> 66 * <cs? var:html_escape(foo) ?> 67 * </pre> 68 * are turned into: 69 * 70 * <pre> 71 * <cs? escape:"html" ?> 72 * <cs? var:foo ?> 73 * <?cs /escape ?> 74 * </pre> 75 * 76 * It also optimizes sequences of escaped expressions into a single escaped sequence. 77 * <p> 78 * It is important to note that these optimizations cannot be done in isolation if we want to 79 * optimize compound expressions such as: 80 * 81 * <pre> 82 * <cs? html_escape(foo + bar) + baz ?> 83 * </pre> 84 * which is turned into: 85 * 86 * <pre> 87 * <cs? escape:"html" ?> 88 * <cs? var:foo ?> 89 * <cs? var:bar ?> 90 * <?cs /escape ?> 91 * <?cs var:baz ?> 92 * </pre> 93 * 94 * WARNING: This class isn't strictly just an optimization and its modification of the syntax tree 95 * actually improves JSilver's behavior, bringing it more in line with ClearSilver. Consider the 96 * sequence: 97 * 98 * <pre> 99 * <cs? escape:"html" ?> 100 * <cs? var:url_escape(foo) ?> 101 * <?cs /escape ?> 102 * </pre> 103 * 104 * In JSilver (without this optimizer being run) this would result in {@code foo} being escaped by 105 * both the html escaper and the url escaping function. However ClearSilver treats top-level escaper 106 * functions specially and {@code foo} is only escaped once by the url escaping function. 107 * 108 * The good news is that this optimization rewrites the above example to: 109 * 110 * <pre> 111 * <cs? escape:"html" ?> 112 * <cs? escape:"url" ?> 113 * <cs? var:foo ?> 114 * <?cs /escape ?> 115 * <?cs /escape ?> 116 * </pre> 117 * which fixes the problem because the new url escaper replaces the existing html escaper (rather 118 * than combining with it). 119 * 120 * The only fly in the ointment here is the {@code url_validate} function which is treated like an 121 * escaper by ClearSilver but which does not (currently) have an escaper associated with it. This 122 * means that: 123 * 124 * <pre> 125 * <cs? escape:"html" ?> 126 * <cs? var:url_validate(foo) ?> 127 * <?cs /escape ?> 128 * </pre> 129 * will not be rewritten by this class and will result in {@code foo} being escaped twice. 130 * 131 */ 132public class VarOptimizer extends DepthFirstAdapter { 133 134 /** 135 * A list of escaper names that are also exposed as escaping functions (eg, if the "foo" escaper 136 * is also exposed as "foo_escape" function then this collection should contain the string "foo"). 137 */ 138 private final Collection<String> escaperNames; 139 140 public VarOptimizer(Collection<String> escaperNames) { 141 this.escaperNames = escaperNames; 142 } 143 144 @Override 145 public void caseAMultipleCommand(AMultipleCommand multiCommand) { 146 super.caseAMultipleCommand(multiCommand); 147 multiCommand.replaceBy(optimizeEscapeSequences(multiCommand)); 148 } 149 150 @Override 151 public void caseAVarCommand(AVarCommand varCommand) { 152 super.caseAVarCommand(varCommand); 153 varCommand.replaceBy(optimizeVarCommands(varCommand)); 154 } 155 156 /** 157 * Optimizes a complex var command by recursively expanding its expression into a sequence of 158 * simpler var commands. Currently two expressions are targetted for expansion: string 159 * concatenation and escaping functions. 160 */ 161 private PCommand optimizeVarCommands(AVarCommand varCommand) { 162 PExpression expression = varCommand.getExpression(); 163 PPosition position = varCommand.getPosition(); 164 165 // This test relies on the type optimizer having replaced add commands 166 // with numeric add commands. 167 if (expression instanceof AAddExpression) { 168 // Replace: <?cs var:a + b ?> 169 // with: <?cs var:a ?><?cs var:b ?> 170 AAddExpression addExpression = (AAddExpression) expression; 171 AMultipleCommand multiCommand = new AMultipleCommand(); 172 addToContents(multiCommand, optimizedVarCommandOf(position, addExpression.getLeft())); 173 addToContents(multiCommand, optimizedVarCommandOf(position, addExpression.getRight())); 174 return optimizeEscapeSequences(multiCommand); 175 } 176 177 // This test relies on the sequence optimizer removing single element 178 // sequence commands. 179 if (expression instanceof AFunctionExpression) { 180 // Replace: <?cs var:foo_escape(x) ?> 181 // with: <?cs escape:"foo" ?><?cs var:x ?><?cs /escape ?> 182 AFunctionExpression functionExpression = (AFunctionExpression) expression; 183 String name = escapeNameOf(functionExpression); 184 if (escaperNames.contains(name)) { 185 LinkedList<PExpression> args = functionExpression.getArgs(); 186 if (args.size() == 1) { 187 return new AEscapeCommand(position, quotedStringExpressionOf(name), 188 optimizedVarCommandOf(position, args.getFirst())); 189 } 190 } 191 } 192 return varCommand; 193 } 194 195 /** 196 * Create a var command from the given expression and recursively optimize it, returning the 197 * result. 198 */ 199 private PCommand optimizedVarCommandOf(PPosition position, PExpression expression) { 200 return optimizeVarCommands(new AVarCommand(cloneOf(position), cloneOf(expression))); 201 } 202 203 /** Simple helper to clone nodes in a typesafe way */ 204 @SuppressWarnings("unchecked") 205 private static <T extends Node> T cloneOf(T t) { 206 return (T) t.clone(); 207 } 208 209 /** 210 * Helper to efficiently add commands to a multiple command (if the command to be added is a 211 * multiple command, we add its contents). This is used to implement a tail recursion optimization 212 * to flatten multiple commands. 213 */ 214 private static void addToContents(AMultipleCommand multi, PCommand command) { 215 if (command instanceof AMultipleCommand) { 216 multi.getCommand().addAll(((AMultipleCommand) command).getCommand()); 217 } else { 218 multi.getCommand().add(command); 219 } 220 } 221 222 /** When used as functions, escapers have the name 'foo_escape' */ 223 private static final String ESCAPE_SUFFIX = "_escape"; 224 225 /** 226 * Returns the name of the escaper which could replace this function (or null if this function 227 * cannot be replaced). 228 */ 229 private static String escapeNameOf(AFunctionExpression function) { 230 PVariable nvar = function.getName(); 231 if (!(nvar instanceof ANameVariable)) { 232 // We are not interested in dynamic function calls (such as "a.b(x)") 233 return null; 234 } 235 String name = ((ANameVariable) nvar).getWord().getText(); 236 if (!name.endsWith(ESCAPE_SUFFIX)) { 237 return null; 238 } 239 return name.substring(0, name.length() - ESCAPE_SUFFIX.length()); 240 } 241 242 /** 243 * Returns a quoted string expression of the given text. 244 * <p> 245 * This is used because when an escaper is called as a function we need to replace: 246 * 247 * <pre> 248 * <cs? var:foo_escape(bar) ?> 249 * </pre> 250 * with: 251 * 252 * <pre> 253 * <cs? escape:"foo" ?><cs? var:bar ?><?cs /escape ?> 254 * </pre> 255 * Using the quoted escaper name. 256 */ 257 private static AStringExpression quotedStringExpressionOf(String text) { 258 assert text.indexOf('"') == -1; 259 return new AStringExpression(new TString('"' + text + '"')); 260 } 261 262 /** 263 * Returns a new command containing the contents of the given multiple command but with with 264 * multiple successive (matching) escape commands folded into one. 265 */ 266 private static PCommand optimizeEscapeSequences(AMultipleCommand multiCommand) { 267 AEscapeCommand lastEscapeCommand = null; 268 LinkedList<PCommand> commands = new LinkedList<PCommand>(); 269 for (PCommand command : multiCommand.getCommand()) { 270 AEscapeCommand escapeCommand = asSimpleEscapeCommand(command); 271 if (isSameEscaper(escapeCommand, lastEscapeCommand)) { 272 addToContents(contentsOf(lastEscapeCommand), escapeCommand.getCommand()); 273 } else { 274 // Add the original command and set the escaper (possibly null) 275 commands.add(command); 276 lastEscapeCommand = escapeCommand; 277 } 278 } 279 assert !commands.isEmpty(); 280 return (commands.size() > 1) ? new AMultipleCommand(commands) : commands.getFirst(); 281 } 282 283 /** 284 * Returns the escaped command associated with the given escape function as a multiple command. If 285 * the command was already a multiple command, it is returned, otherwise a new multiple command is 286 * created to wrap the original escaped command. This helper facilitates merging multiple 287 * sequences of escapers. 288 */ 289 private static AMultipleCommand contentsOf(AEscapeCommand escapeCommand) { 290 PCommand escapedCommand = escapeCommand.getCommand(); 291 if (escapedCommand instanceof AMultipleCommand) { 292 return (AMultipleCommand) escapedCommand; 293 } 294 AMultipleCommand multiCommand = new AMultipleCommand(); 295 multiCommand.getCommand().add(escapedCommand); 296 escapeCommand.setCommand(multiCommand); 297 return multiCommand; 298 } 299 300 /** 301 * Returns the given command only if it is an escape command with a simple, string literal, name; 302 * otherwise returns {@code null}. 303 */ 304 private static AEscapeCommand asSimpleEscapeCommand(PCommand command) { 305 if (!(command instanceof AEscapeCommand)) { 306 return null; 307 } 308 AEscapeCommand escapeCommand = (AEscapeCommand) command; 309 if (!(escapeCommand.getExpression() instanceof AStringExpression)) { 310 return null; 311 } 312 return escapeCommand; 313 } 314 315 /** 316 * Compares two simple escape commands and returns true if they perform the same escaping 317 * function. 318 */ 319 private static boolean isSameEscaper(AEscapeCommand newCommand, AEscapeCommand oldCommand) { 320 if (newCommand == null || oldCommand == null) { 321 return false; 322 } 323 return simpleNameOf(newCommand).equals(simpleNameOf(oldCommand)); 324 } 325 326 /** 327 * Returns the name of the given simple escape command (as returned by 328 * {@link #asSimpleEscapeCommand(PCommand)}). 329 */ 330 private static String simpleNameOf(AEscapeCommand escapeCommand) { 331 return ((AStringExpression) escapeCommand.getExpression()).getValue().getText(); 332 } 333} 334