1/*
2 * Copyright (C) 2010 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.clearsilver.jsilver.functions.escape;
18
19
20/**
21 * This class HTML escapes a string in the same way as the ClearSilver html_escape function.
22 *
23 * This implementation has been optimized for performance.
24 *
25 */
26public class HtmlEscapeFunction extends SimpleEscapingFunction {
27
28  // The escape chars
29  private static final char[] ESCAPE_CHARS = {'<', '>', '&', '\'', '"'};
30
31  // UNQUOTED_ESCAPE_CHARS = ESCAPE_CHARS + UNQUOTED_EXTRA_CHARS + chars < 0x20 + 0x7f
32  private static final char[] UNQUOTED_ESCAPE_CHARS;
33
34  private static final char[] UNQUOTED_EXTRA_CHARS = {'=', ' '};
35
36  // The corresponding escape strings for all ascii characters.
37  // With control characters, we simply strip them out if necessary.
38  private static String[] ESCAPE_CODES =
39      {"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
40          "", "", "", "", "", "", "", "", "", "", "!", "&quot;", "#", "$", "%", "&amp;", "&#39;",
41          "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
42          ":", ";", "&lt;", "&#61;", "&gt;", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I",
43          "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[",
44          "\\", "]", "^", "_", "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
45          "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~",
46          ""};
47
48  static {
49    UNQUOTED_ESCAPE_CHARS = new char[33 + ESCAPE_CHARS.length + UNQUOTED_EXTRA_CHARS.length];
50    // In unquoted HTML attributes, strip out control characters also, as they could
51    // get interpreted as end of attribute, just like spaces.
52    for (int n = 0; n <= 0x1f; n++) {
53      UNQUOTED_ESCAPE_CHARS[n] = (char) n;
54    }
55    UNQUOTED_ESCAPE_CHARS[32] = (char) 0x7f;
56    System.arraycopy(ESCAPE_CHARS, 0, UNQUOTED_ESCAPE_CHARS, 33, ESCAPE_CHARS.length);
57    System.arraycopy(UNQUOTED_EXTRA_CHARS, 0, UNQUOTED_ESCAPE_CHARS, 33 + ESCAPE_CHARS.length,
58        UNQUOTED_EXTRA_CHARS.length);
59
60  }
61
62  /**
63   * isUnquoted should be true if the function is escaping a string that will appear inside an
64   * unquoted HTML attribute.
65   *
66   * If the string is unquoted, we strip out all characters 0 - 0x1f and 0x7f for security reasons.
67   */
68  public HtmlEscapeFunction(boolean isUnquoted) {
69    if (isUnquoted) {
70      super.setEscapeChars(UNQUOTED_ESCAPE_CHARS);
71    } else {
72      super.setEscapeChars(ESCAPE_CHARS);
73    }
74  }
75
76  @Override
77  protected String getEscapeString(char c) {
78    if (c < 0x80) {
79      return ESCAPE_CODES[c];
80    }
81    throw new IllegalArgumentException("Unexpected escape character " + c + "[" + (int) c + "]");
82  }
83}
84