1/*
2 * Copyright (C) 2010 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.clearsilver.jsilver.syntax;
18
19import com.google.clearsilver.jsilver.syntax.analysis.DepthFirstAdapter;
20import com.google.clearsilver.jsilver.syntax.node.AAddExpression;
21import com.google.clearsilver.jsilver.syntax.node.AEscapeCommand;
22import com.google.clearsilver.jsilver.syntax.node.AFunctionExpression;
23import com.google.clearsilver.jsilver.syntax.node.AMultipleCommand;
24import com.google.clearsilver.jsilver.syntax.node.ANameVariable;
25import com.google.clearsilver.jsilver.syntax.node.AStringExpression;
26import com.google.clearsilver.jsilver.syntax.node.AVarCommand;
27import com.google.clearsilver.jsilver.syntax.node.Node;
28import com.google.clearsilver.jsilver.syntax.node.PCommand;
29import com.google.clearsilver.jsilver.syntax.node.PExpression;
30import com.google.clearsilver.jsilver.syntax.node.PPosition;
31import com.google.clearsilver.jsilver.syntax.node.PVariable;
32import com.google.clearsilver.jsilver.syntax.node.TString;
33
34import java.util.Collection;
35import java.util.LinkedList;
36
37/**
38 * Recursively optimizes the syntax tree with a set of simple operations. This class currently
39 * optimizes:
40 * <ul>
41 * <li>String concatenation in var commands
42 * <li>Function calls to escaping functions
43 * </ul>
44 * <p>
45 * String add expressions in var commands are optimized by replacing something like:
46 *
47 * <pre>
48 * &lt;cs? var:a + b ?&gt;
49 * </pre>
50 * with:
51 *
52 * <pre>
53 * &lt;cs? var:a ?&gt;&lt;cs? var:b ?&gt;
54 * </pre>
55 *
56 * This avoids having to construct the intermediate result {@code a + b} at runtime and reduces
57 * runtime heap allocations.
58 * <p>
59 * Functions call to escaping functions are optimized by replacing them with the equivalent escaping
60 * construct. This is faster because escapers are called with the strings themselves whereas general
61 * function calls require value objects to be created.
62 * <p>
63 * Expressions such as:
64 *
65 * <pre>
66 * &lt;cs? var:html_escape(foo) ?&gt;
67 * </pre>
68 * are turned into:
69 *
70 * <pre>
71 * &lt;cs? escape:&quot;html&quot; ?&gt;
72 * &lt;cs? var:foo ?&gt;
73 * &lt;?cs /escape ?&gt;
74 * </pre>
75 *
76 * It also optimizes sequences of escaped expressions into a single escaped sequence.
77 * <p>
78 * It is important to note that these optimizations cannot be done in isolation if we want to
79 * optimize compound expressions such as:
80 *
81 * <pre>
82 * &lt;cs? html_escape(foo + bar) + baz ?&gt;
83 * </pre>
84 * which is turned into:
85 *
86 * <pre>
87 * &lt;cs? escape:&quot;html&quot; ?&gt;
88 * &lt;cs? var:foo ?&gt;
89 * &lt;cs? var:bar ?&gt;
90 * &lt;?cs /escape ?&gt;
91 * &lt;?cs var:baz ?&gt;
92 * </pre>
93 *
94 * WARNING: This class isn't strictly just an optimization and its modification of the syntax tree
95 * actually improves JSilver's behavior, bringing it more in line with ClearSilver. Consider the
96 * sequence:
97 *
98 * <pre>
99 * &lt;cs? escape:&quot;html&quot; ?&gt;
100 * &lt;cs? var:url_escape(foo) ?&gt;
101 * &lt;?cs /escape ?&gt;
102 * </pre>
103 *
104 * In JSilver (without this optimizer being run) this would result in {@code foo} being escaped by
105 * both the html escaper and the url escaping function. However ClearSilver treats top-level escaper
106 * functions specially and {@code foo} is only escaped once by the url escaping function.
107 *
108 * The good news is that this optimization rewrites the above example to:
109 *
110 * <pre>
111 * &lt;cs? escape:&quot;html&quot; ?&gt;
112 * &lt;cs? escape:&quot;url&quot; ?&gt;
113 * &lt;cs? var:foo ?&gt;
114 * &lt;?cs /escape ?&gt;
115 * &lt;?cs /escape ?&gt;
116 * </pre>
117 * which fixes the problem because the new url escaper replaces the existing html escaper (rather
118 * than combining with it).
119 *
120 * The only fly in the ointment here is the {@code url_validate} function which is treated like an
121 * escaper by ClearSilver but which does not (currently) have an escaper associated with it. This
122 * means that:
123 *
124 * <pre>
125 * &lt;cs? escape:&quot;html&quot; ?&gt;
126 * &lt;cs? var:url_validate(foo) ?&gt;
127 * &lt;?cs /escape ?&gt;
128 * </pre>
129 * will not be rewritten by this class and will result in {@code foo} being escaped twice.
130 *
131 */
132public class VarOptimizer extends DepthFirstAdapter {
133
134  /**
135   * A list of escaper names that are also exposed as escaping functions (eg, if the "foo" escaper
136   * is also exposed as "foo_escape" function then this collection should contain the string "foo").
137   */
138  private final Collection<String> escaperNames;
139
140  public VarOptimizer(Collection<String> escaperNames) {
141    this.escaperNames = escaperNames;
142  }
143
144  @Override
145  public void caseAMultipleCommand(AMultipleCommand multiCommand) {
146    super.caseAMultipleCommand(multiCommand);
147    multiCommand.replaceBy(optimizeEscapeSequences(multiCommand));
148  }
149
150  @Override
151  public void caseAVarCommand(AVarCommand varCommand) {
152    super.caseAVarCommand(varCommand);
153    varCommand.replaceBy(optimizeVarCommands(varCommand));
154  }
155
156  /**
157   * Optimizes a complex var command by recursively expanding its expression into a sequence of
158   * simpler var commands. Currently two expressions are targetted for expansion: string
159   * concatenation and escaping functions.
160   */
161  private PCommand optimizeVarCommands(AVarCommand varCommand) {
162    PExpression expression = varCommand.getExpression();
163    PPosition position = varCommand.getPosition();
164
165    // This test relies on the type optimizer having replaced add commands
166    // with numeric add commands.
167    if (expression instanceof AAddExpression) {
168      // Replace: <?cs var:a + b ?>
169      // with: <?cs var:a ?><?cs var:b ?>
170      AAddExpression addExpression = (AAddExpression) expression;
171      AMultipleCommand multiCommand = new AMultipleCommand();
172      addToContents(multiCommand, optimizedVarCommandOf(position, addExpression.getLeft()));
173      addToContents(multiCommand, optimizedVarCommandOf(position, addExpression.getRight()));
174      return optimizeEscapeSequences(multiCommand);
175    }
176
177    // This test relies on the sequence optimizer removing single element
178    // sequence commands.
179    if (expression instanceof AFunctionExpression) {
180      // Replace: <?cs var:foo_escape(x) ?>
181      // with: <?cs escape:"foo" ?><?cs var:x ?><?cs /escape ?>
182      AFunctionExpression functionExpression = (AFunctionExpression) expression;
183      String name = escapeNameOf(functionExpression);
184      if (escaperNames.contains(name)) {
185        LinkedList<PExpression> args = functionExpression.getArgs();
186        if (args.size() == 1) {
187          return new AEscapeCommand(position, quotedStringExpressionOf(name),
188              optimizedVarCommandOf(position, args.getFirst()));
189        }
190      }
191    }
192    return varCommand;
193  }
194
195  /**
196   * Create a var command from the given expression and recursively optimize it, returning the
197   * result.
198   */
199  private PCommand optimizedVarCommandOf(PPosition position, PExpression expression) {
200    return optimizeVarCommands(new AVarCommand(cloneOf(position), cloneOf(expression)));
201  }
202
203  /** Simple helper to clone nodes in a typesafe way */
204  @SuppressWarnings("unchecked")
205  private static <T extends Node> T cloneOf(T t) {
206    return (T) t.clone();
207  }
208
209  /**
210   * Helper to efficiently add commands to a multiple command (if the command to be added is a
211   * multiple command, we add its contents). This is used to implement a tail recursion optimization
212   * to flatten multiple commands.
213   */
214  private static void addToContents(AMultipleCommand multi, PCommand command) {
215    if (command instanceof AMultipleCommand) {
216      multi.getCommand().addAll(((AMultipleCommand) command).getCommand());
217    } else {
218      multi.getCommand().add(command);
219    }
220  }
221
222  /** When used as functions, escapers have the name 'foo_escape' */
223  private static final String ESCAPE_SUFFIX = "_escape";
224
225  /**
226   * Returns the name of the escaper which could replace this function (or null if this function
227   * cannot be replaced).
228   */
229  private static String escapeNameOf(AFunctionExpression function) {
230    PVariable nvar = function.getName();
231    if (!(nvar instanceof ANameVariable)) {
232      // We are not interested in dynamic function calls (such as "a.b(x)")
233      return null;
234    }
235    String name = ((ANameVariable) nvar).getWord().getText();
236    if (!name.endsWith(ESCAPE_SUFFIX)) {
237      return null;
238    }
239    return name.substring(0, name.length() - ESCAPE_SUFFIX.length());
240  }
241
242  /**
243   * Returns a quoted string expression of the given text.
244   * <p>
245   * This is used because when an escaper is called as a function we need to replace:
246   *
247   * <pre>
248   * &lt;cs? var:foo_escape(bar) ?&gt;
249   * </pre>
250   * with:
251   *
252   * <pre>
253   * &lt;cs? escape:&quot;foo&quot; ?&gt;&lt;cs? var:bar ?&gt;&lt;?cs /escape ?&gt;
254   * </pre>
255   * Using the quoted escaper name.
256   */
257  private static AStringExpression quotedStringExpressionOf(String text) {
258    assert text.indexOf('"') == -1;
259    return new AStringExpression(new TString('"' + text + '"'));
260  }
261
262  /**
263   * Returns a new command containing the contents of the given multiple command but with with
264   * multiple successive (matching) escape commands folded into one.
265   */
266  private static PCommand optimizeEscapeSequences(AMultipleCommand multiCommand) {
267    AEscapeCommand lastEscapeCommand = null;
268    LinkedList<PCommand> commands = new LinkedList<PCommand>();
269    for (PCommand command : multiCommand.getCommand()) {
270      AEscapeCommand escapeCommand = asSimpleEscapeCommand(command);
271      if (isSameEscaper(escapeCommand, lastEscapeCommand)) {
272        addToContents(contentsOf(lastEscapeCommand), escapeCommand.getCommand());
273      } else {
274        // Add the original command and set the escaper (possibly null)
275        commands.add(command);
276        lastEscapeCommand = escapeCommand;
277      }
278    }
279    assert !commands.isEmpty();
280    return (commands.size() > 1) ? new AMultipleCommand(commands) : commands.getFirst();
281  }
282
283  /**
284   * Returns the escaped command associated with the given escape function as a multiple command. If
285   * the command was already a multiple command, it is returned, otherwise a new multiple command is
286   * created to wrap the original escaped command. This helper facilitates merging multiple
287   * sequences of escapers.
288   */
289  private static AMultipleCommand contentsOf(AEscapeCommand escapeCommand) {
290    PCommand escapedCommand = escapeCommand.getCommand();
291    if (escapedCommand instanceof AMultipleCommand) {
292      return (AMultipleCommand) escapedCommand;
293    }
294    AMultipleCommand multiCommand = new AMultipleCommand();
295    multiCommand.getCommand().add(escapedCommand);
296    escapeCommand.setCommand(multiCommand);
297    return multiCommand;
298  }
299
300  /**
301   * Returns the given command only if it is an escape command with a simple, string literal, name;
302   * otherwise returns {@code null}.
303   */
304  private static AEscapeCommand asSimpleEscapeCommand(PCommand command) {
305    if (!(command instanceof AEscapeCommand)) {
306      return null;
307    }
308    AEscapeCommand escapeCommand = (AEscapeCommand) command;
309    if (!(escapeCommand.getExpression() instanceof AStringExpression)) {
310      return null;
311    }
312    return escapeCommand;
313  }
314
315  /**
316   * Compares two simple escape commands and returns true if they perform the same escaping
317   * function.
318   */
319  private static boolean isSameEscaper(AEscapeCommand newCommand, AEscapeCommand oldCommand) {
320    if (newCommand == null || oldCommand == null) {
321      return false;
322    }
323    return simpleNameOf(newCommand).equals(simpleNameOf(oldCommand));
324  }
325
326  /**
327   * Returns the name of the given simple escape command (as returned by
328   * {@link #asSimpleEscapeCommand(PCommand)}).
329   */
330  private static String simpleNameOf(AEscapeCommand escapeCommand) {
331    return ((AStringExpression) escapeCommand.getExpression()).getValue().getText();
332  }
333}
334