1/**
2 * Copyright (c) 2006, Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.android.mail.common.base;
18
19import static com.google.android.mail.common.base.Preconditions.checkNotNull;
20
21import java.io.IOException;
22
23/**
24 * An object that converts literal text into a format safe for inclusion in a particular context
25 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
26 * text is performed automatically by the relevant parser.
27 *
28 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
29 * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
30 * resulting XML document is parsed, the parser API will return this text as the original literal
31 * string {@code "Foo<Bar>"}.
32 *
33 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by
34 * multiple threads.
35 *
36 * <p>Several popular escapers are defined as constants in the class {@link CharEscapers}. To create
37 * your own escapers, use {@link CharEscaperBuilder}, or extend this class and implement the {@link
38 * #escape(char)} method.
39 *
40 * @author sven@google.com (Sven Mawson)
41 */
42public abstract class CharEscaper extends Escaper {
43  /**
44   * Returns the escaped form of a given literal string.
45   *
46   * @param string the literal string to be escaped
47   * @return the escaped form of {@code string}
48   * @throws NullPointerException if {@code string} is null
49   */
50  @Override public String escape(String string) {
51    checkNotNull(string);
52    // Inlineable fast-path loop which hands off to escapeSlow() only if needed
53    int length = string.length();
54    for (int index = 0; index < length; index++) {
55      if (escape(string.charAt(index)) != null) {
56        return escapeSlow(string, index);
57      }
58    }
59    return string;
60  }
61
62  /**
63   * Returns an {@code Appendable} instance which automatically escapes all text appended to it
64   * before passing the resulting text to an underlying {@code Appendable}.
65   *
66   * <p>The methods of the returned object will propagate any exceptions thrown by the underlying
67   * {@code Appendable}, and will throw {@link NullPointerException} if asked to append {@code
68   * null}, but do not otherwise throw any exceptions.
69   *
70   * <p>The escaping behavior is identical to that of {@link #escape(String)}, so the following code
71   * is always equivalent to {@code escaper.escape(string)}: <pre>   {@code
72   *
73   *   StringBuilder sb = new StringBuilder();
74   *   escaper.escape(sb).append(string);
75   *   return sb.toString();}</pre>
76   *
77   * @param out the underlying {@code Appendable} to append escaped output to
78   * @return an {@code Appendable} which passes text to {@code out} after escaping it
79   * @throws NullPointerException if {@code out} is null.
80   */
81  @Override public Appendable escape(final Appendable out) {
82    checkNotNull(out);
83
84    return new Appendable() {
85      @Override public Appendable append(CharSequence csq) throws IOException {
86        out.append(escape(csq.toString()));
87        return this;
88      }
89
90      @Override public Appendable append(CharSequence csq, int start, int end) throws IOException {
91        out.append(escape(csq.subSequence(start, end).toString()));
92        return this;
93      }
94
95      @Override public Appendable append(char c) throws IOException {
96        char[] escaped = escape(c);
97        if (escaped == null) {
98          out.append(c);
99        } else {
100          for (char e : escaped) {
101            out.append(e);
102          }
103        }
104        return this;
105      }
106    };
107  }
108
109  /**
110   * Returns the escaped form of a given literal string, starting at the given index. This method is
111   * called by the {@link #escape(String)} method when it discovers that escaping is required. It is
112   * protected to allow subclasses to override the fastpath escaping function to inline their
113   * escaping test. See {@link CharEscaperBuilder} for an example usage.
114   *
115   * @param s the literal string to be escaped
116   * @param index the index to start escaping from
117   * @return the escaped form of {@code string}
118   * @throws NullPointerException if {@code string} is null
119   */
120  protected String escapeSlow(String s, int index) {
121    int slen = s.length();
122
123    // Get a destination buffer and setup some loop variables.
124    char[] dest = Platform.charBufferFromThreadLocal();
125    int destSize = dest.length;
126    int destIndex = 0;
127    int lastEscape = 0;
128
129    // Loop through the rest of the string, replacing when needed into the
130    // destination buffer, which gets grown as needed as well.
131    for (; index < slen; index++) {
132
133      // Get a replacement for the current character.
134      char[] r = escape(s.charAt(index));
135
136      // If no replacement is needed, just continue.
137      if (r == null) continue;
138
139      int rlen = r.length;
140      int charsSkipped = index - lastEscape;
141
142      // This is the size needed to add the replacement, not the full size needed by the string. We
143      // only regrow when we absolutely must.
144      int sizeNeeded = destIndex + charsSkipped + rlen;
145      if (destSize < sizeNeeded) {
146        destSize = sizeNeeded + (slen - index) + DEST_PAD;
147        dest = growBuffer(dest, destIndex, destSize);
148      }
149
150      // If we have skipped any characters, we need to copy them now.
151      if (charsSkipped > 0) {
152        s.getChars(lastEscape, index, dest, destIndex);
153        destIndex += charsSkipped;
154      }
155
156      // Copy the replacement string into the dest buffer as needed.
157      if (rlen > 0) {
158        System.arraycopy(r, 0, dest, destIndex, rlen);
159        destIndex += rlen;
160      }
161      lastEscape = index + 1;
162    }
163
164    // Copy leftover characters if there are any.
165    int charsLeft = slen - lastEscape;
166    if (charsLeft > 0) {
167      int sizeNeeded = destIndex + charsLeft;
168      if (destSize < sizeNeeded) {
169
170        // Regrow and copy, expensive! No padding as this is the final copy.
171        dest = growBuffer(dest, destIndex, sizeNeeded);
172      }
173      s.getChars(lastEscape, slen, dest, destIndex);
174      destIndex = sizeNeeded;
175    }
176    return new String(dest, 0, destIndex);
177  }
178
179  /**
180   * Returns the escaped form of the given character, or {@code null} if this character does not
181   * need to be escaped. If an empty array is returned, this effectively strips the input character
182   * from the resulting text.
183   *
184   * <p>If the character does not need to be escaped, this method should return {@code null}, rather
185   * than a one-character array containing the character itself. This enables the escaping algorithm
186   * to perform more efficiently.
187   *
188   * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should
189   * not throw any exceptions.
190   *
191   * @param c the character to escape if necessary
192   * @return the replacement characters, or {@code null} if no escaping was needed
193   */
194  protected abstract char[] escape(char c);
195
196  /**
197   * Helper method to grow the character buffer as needed, this only happens once in a while so it's
198   * ok if it's in a method call. If the index passed in is 0 then no copying will be done.
199   */
200  private static char[] growBuffer(char[] dest, int index, int size) {
201    char[] copy = new char[size];
202    if (index > 0) {
203      System.arraycopy(dest, 0, copy, 0, index);
204    }
205    return copy;
206  }
207
208  /**
209   * The amount of padding to use when growing the escape buffer.
210   */
211  private static final int DEST_PAD = 32;
212}