156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/*
256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Copyright (C) 2010 Google Inc.
356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Licensed under the Apache License, Version 2.0 (the "License");
556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * you may not use this file except in compliance with the License.
656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * You may obtain a copy of the License at
756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * http://www.apache.org/licenses/LICENSE-2.0
956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
1056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Unless required by applicable law or agreed to in writing, software
1156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * distributed under the License is distributed on an "AS IS" BASIS,
1256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * See the License for the specific language governing permissions and
1456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * limitations under the License.
1556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */
1656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
1756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpackage com.google.clearsilver.jsilver.functions.escape;
1856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
1956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonimport com.google.clearsilver.jsilver.functions.TextFilter;
2056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
2156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonimport java.io.IOException;
2256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
2356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/**
2456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Base class to make writing fast, simple escaping functions easy. A simple escaping function is
2556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * one where each character in the input is treated independently and there is no runtime state. The
2656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * only decision you make is whether the current character should be escaped into some different
2756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * string or not.
2856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
2956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * The only serious limitation on using this class it that only low valued characters can be
3056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * escaped. This is because (for speed) we use an array of escaped strings, indexed by character
3156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * value. In future this limitation may be lifted if there's a call for it.
3256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */
3356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpublic abstract class SimpleEscapingFunction implements TextFilter {
3456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  // The limit for how many strings we can store here (max)
3556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  private static final int CHAR_INDEX_LIMIT = 256;
3656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
3756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  // Our fast lookup array of escaped strings. This array is indexed by char
3856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  // value so it's important not to have it grow too large. For now we have
3956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  // an artificial limit on it.
4056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  private String[] ESCAPE_STRINGS;
4156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
4256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
4356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Creates an instance to escape the given set of characters.
4456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
4556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  protected SimpleEscapingFunction(char[] ESCAPE_CHARS) {
4656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    setEscapeChars(ESCAPE_CHARS);
4756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
4856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
4956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  protected SimpleEscapingFunction() {
5056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    ESCAPE_STRINGS = new String[0];
5156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
5256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
5356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  protected void setEscapeChars(char[] ESCAPE_CHARS) throws AssertionError {
5456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    int highestChar = -1;
5556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    for (char c : ESCAPE_CHARS) {
5656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      if (c > highestChar) {
5756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        highestChar = c;
5856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      }
5956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
6056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    if (highestChar >= CHAR_INDEX_LIMIT) {
6156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      throw new AssertionError("Cannot escape characters with values above " + CHAR_INDEX_LIMIT);
6256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
6356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    ESCAPE_STRINGS = new String[highestChar + 1];
6456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    for (char c : ESCAPE_CHARS) {
6556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      ESCAPE_STRINGS[c] = getEscapeString(c);
6656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
6756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
6856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
6956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
7056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Given one of the escape characters supplied to this instance's constructor, return the escape
7156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * string for it. This method does not need to be efficient.
7256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
7356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  protected abstract String getEscapeString(char c);
7456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
7556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
7656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Algorithm is as follows:
7756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * <ol>
7856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * <li>Scan block for contiguous unescaped sequences
7956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * <li>Append unescaped sequences to output
8056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * <li>Append escaped string to output (if found)
8156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * <li>Rinse &amp; Repeat
8256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * </ol>
8356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
8456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  @Override
8556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public void filter(String in, Appendable out) throws IOException {
8656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    final int len = in.length();
8756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    int pos = 0;
8856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    int start = pos;
8956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    while (pos < len) {
9056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      // We really hope that the hotspot compiler inlines this call properly
9156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      // (without optimization it accounts for > 50% of the time in this call)
9256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      final char chr = in.charAt(pos);
9356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      final String escapeString;
9456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      if (chr < ESCAPE_STRINGS.length && (escapeString = ESCAPE_STRINGS[chr]) != null) {
9556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        // We really hope our appendable handles sub-strings nicely
9656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        // (we know that StringBuilder / StringBuffer does).
9756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        if (pos > start) {
9856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson          out.append(in, start, pos);
9956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        }
10056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        out.append(escapeString);
10156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        pos += 1;
10256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        start = pos;
10356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        continue;
10456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      }
10556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      pos += 1;
10656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
10756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    if (pos > start) {
10856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      out.append(in, start, pos);
10956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
11056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
11156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson}
112