1/* 2 * Copyright (C) 2008 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.google.common.escape; 18 19import com.google.common.annotations.Beta; 20import com.google.common.annotations.GwtCompatible; 21import com.google.common.base.Function; 22 23/** 24 * An object that converts literal text into a format safe for inclusion in a particular context 25 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the 26 * text is performed automatically by the relevant parser. 27 * 28 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code 29 * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the 30 * resulting XML document is parsed, the parser API will return this text as the original literal 31 * string {@code "Foo<Bar>"}. 32 * 33 * <p>An {@code Escaper} instance is required to be stateless, and safe when used concurrently by 34 * multiple threads. 35 * 36 * <p>Because, in general, escaping operates on the code points of a string and not on its 37 * individual {@code char} values, it is not safe to assume that {@code escape(s)} is equivalent to 38 * {@code escape(s.substring(0, n)) + escape(s.substing(n))} for arbitrary {@code n}. This is 39 * because of the possibility of splitting a surrogate pair. The only case in which it is safe to 40 * escape strings and concatenate the results is if you can rule out this possibility, either by 41 * splitting an existing long string into short strings adaptively around {@linkplain 42 * Character#isHighSurrogate surrogate} {@linkplain Character#isLowSurrogate pairs}, or by starting 43 * with short strings already known to be free of unpaired surrogates. 44 * 45 * <p>The two primary implementations of this interface are {@link CharEscaper} and {@link 46 * UnicodeEscaper}. They are heavily optimized for performance and greatly simplify the task of 47 * implementing new escapers. It is strongly recommended that when implementing a new escaper you 48 * extend one of these classes. If you find that you are unable to achieve the desired behavior 49 * using either of these classes, please contact the Java libraries team for advice. 50 * 51 * <p>Several popular escapers are defined as constants in classes like {@link 52 * com.google.common.html.HtmlEscapers}, {@link com.google.common.xml.XmlEscapers}, and {@link 53 * SourceCodeEscapers}. To create your own escapers, use {@link CharEscaperBuilder}, or extend 54 * {@code CharEscaper} or {@code UnicodeEscaper}. 55 * 56 * @author David Beaumont 57 * @since 15.0 58 */ 59@Beta 60@GwtCompatible 61public abstract class Escaper { 62 // TODO(user): evaluate custom implementations, considering package private constructor. 63 /** Constructor for use by subclasses. */ 64 protected Escaper() {} 65 66 /** 67 * Returns the escaped form of a given literal string. 68 * 69 * <p>Note that this method may treat input characters differently depending on the specific 70 * escaper implementation. 71 * 72 * <ul> 73 * <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a> 74 * correctly, including surrogate character pairs. If the input is badly formed the escaper 75 * should throw {@link IllegalArgumentException}. 76 * <li>{@link CharEscaper} handles Java characters independently and does not verify the input for 77 * well formed characters. A {@code CharEscaper} should not be used in situations where input 78 * is not guaranteed to be restricted to the Basic Multilingual Plane (BMP). 79 * </ul> 80 * 81 * @param string the literal string to be escaped 82 * @return the escaped form of {@code string} 83 * @throws NullPointerException if {@code string} is null 84 * @throws IllegalArgumentException if {@code string} contains badly formed UTF-16 or cannot be 85 * escaped for any other reason 86 */ 87 public abstract String escape(String string); 88 89 private final Function<String, String> asFunction = 90 new Function<String, String>() { 91 @Override 92 public String apply(String from) { 93 return escape(from); 94 } 95 }; 96 97 /** 98 * Returns a {@link Function} that invokes {@link #escape(String)} on this escaper. 99 */ 100 public final Function<String, String> asFunction() { 101 return asFunction; 102 } 103} 104