156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/* 256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Copyright (C) 2010 Google Inc. 356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Licensed under the Apache License, Version 2.0 (the "License"); 556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * you may not use this file except in compliance with the License. 656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * You may obtain a copy of the License at 756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * http://www.apache.org/licenses/LICENSE-2.0 956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 1056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Unless required by applicable law or agreed to in writing, software 1156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * distributed under the License is distributed on an "AS IS" BASIS, 1256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * See the License for the specific language governing permissions and 1456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * limitations under the License. 1556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 1656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 1756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpackage com.google.clearsilver.jsilver.functions.escape; 1856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 1956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonimport com.google.clearsilver.jsilver.functions.TextFilter; 2056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 2156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonimport java.io.IOException; 2256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 2356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/** 2456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Base class to make writing fast, simple escaping functions easy. A simple escaping function is 2556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * one where each character in the input is treated independently and there is no runtime state. The 2656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * only decision you make is whether the current character should be escaped into some different 2756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * string or not. 2856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * 2956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * The only serious limitation on using this class it that only low valued characters can be 3056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * escaped. This is because (for speed) we use an array of escaped strings, indexed by character 3156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * value. In future this limitation may be lifted if there's a call for it. 3256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 3356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpublic abstract class SimpleEscapingFunction implements TextFilter { 3456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // The limit for how many strings we can store here (max) 3556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson private static final int CHAR_INDEX_LIMIT = 256; 3656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 3756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // Our fast lookup array of escaped strings. This array is indexed by char 3856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // value so it's important not to have it grow too large. For now we have 3956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // an artificial limit on it. 4056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson private String[] ESCAPE_STRINGS; 4156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 4256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 4356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Creates an instance to escape the given set of characters. 4456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 4556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson protected SimpleEscapingFunction(char[] ESCAPE_CHARS) { 4656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson setEscapeChars(ESCAPE_CHARS); 4756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 4856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 4956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson protected SimpleEscapingFunction() { 5056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson ESCAPE_STRINGS = new String[0]; 5156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 5256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 5356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson protected void setEscapeChars(char[] ESCAPE_CHARS) throws AssertionError { 5456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson int highestChar = -1; 5556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson for (char c : ESCAPE_CHARS) { 5656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson if (c > highestChar) { 5756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson highestChar = c; 5856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 5956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 6056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson if (highestChar >= CHAR_INDEX_LIMIT) { 6156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson throw new AssertionError("Cannot escape characters with values above " + CHAR_INDEX_LIMIT); 6256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 6356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson ESCAPE_STRINGS = new String[highestChar + 1]; 6456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson for (char c : ESCAPE_CHARS) { 6556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson ESCAPE_STRINGS[c] = getEscapeString(c); 6656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 6756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 6856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 6956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 7056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Given one of the escape characters supplied to this instance's constructor, return the escape 7156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * string for it. This method does not need to be efficient. 7256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 7356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson protected abstract String getEscapeString(char c); 7456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson 7556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson /** 7656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Algorithm is as follows: 7756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <ol> 7856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <li>Scan block for contiguous unescaped sequences 7956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <li>Append unescaped sequences to output 8056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <li>Append escaped string to output (if found) 8156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <li>Rinse & Repeat 8256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * </ol> 8356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */ 8456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson @Override 8556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson public void filter(String in, Appendable out) throws IOException { 8656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson final int len = in.length(); 8756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson int pos = 0; 8856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson int start = pos; 8956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson while (pos < len) { 9056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // We really hope that the hotspot compiler inlines this call properly 9156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // (without optimization it accounts for > 50% of the time in this call) 9256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson final char chr = in.charAt(pos); 9356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson final String escapeString; 9456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson if (chr < ESCAPE_STRINGS.length && (escapeString = ESCAPE_STRINGS[chr]) != null) { 9556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // We really hope our appendable handles sub-strings nicely 9656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson // (we know that StringBuilder / StringBuffer does). 9756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson if (pos > start) { 9856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson out.append(in, start, pos); 9956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 10056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson out.append(escapeString); 10156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson pos += 1; 10256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson start = pos; 10356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson continue; 10456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 10556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson pos += 1; 10656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 10756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson if (pos > start) { 10856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson out.append(in, start, pos); 10956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 11056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson } 11156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson} 112