1993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira/**
2993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * Copyright (c) 2006, Google Inc.
3993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
4993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * Licensed under the Apache License, Version 2.0 (the "License");
5993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * you may not use this file except in compliance with the License.
6993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * You may obtain a copy of the License at
7993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
8993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *     http://www.apache.org/licenses/LICENSE-2.0
9993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
10993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * Unless required by applicable law or agreed to in writing, software
11993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * distributed under the License is distributed on an "AS IS" BASIS,
12993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * See the License for the specific language governing permissions and
14993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * limitations under the License.
15993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira */
161bdbfefe4b144c7b031a1d9242a0fa061a0ae6b5Scott Kennedypackage com.google.android.mail.common.base;
17993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
181bdbfefe4b144c7b031a1d9242a0fa061a0ae6b5Scott Kennedyimport static com.google.android.mail.common.base.Preconditions.checkNotNull;
19993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
20993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereiraimport java.io.IOException;
21993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
22993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira/**
23993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * Utility functions for dealing with {@code CharEscaper}s, and some commonly
24993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * used {@code CharEscaper} instances.
25993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
26993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * @author sven@google.com (Sven Mawson)
27993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * @author laurence@google.com (Laurence Gonsalves)
28993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira */
29993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereirapublic final class CharEscapers {
30993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private CharEscapers() {}
31993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
32993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // TODO(matevossian): To implementors of escapers --
33993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  //                    For each xxxEscaper method, please add links to external
34993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  //                    reference pages that we consider authoritative for what
35993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  //                    that escaper should exactly be doing.
36993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
37993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
38993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Performs no escaping.
39993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
40993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper NULL_ESCAPER = new CharEscaper() {
41993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override
42993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    public String escape(String string) {
43993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        checkNotNull(string);
44993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return string;
45993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
46993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
47993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override
48993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      public Appendable escape(final Appendable out) {
49993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        checkNotNull(out);
50993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
51993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        // we can't simply return out because the CharEscaper contract says that
52993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        // the returned Appendable will throw a NullPointerException if asked to
53993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        // append null.
54993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return new Appendable() {
55993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            @Override public Appendable append(CharSequence csq) throws IOException {
56993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              checkNotNull(csq);
57993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              out.append(csq);
58993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              return this;
59993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            }
60993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
61993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            @Override public Appendable append(CharSequence csq, int start, int end)
62993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira                throws IOException {
63993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              checkNotNull(csq);
64993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              out.append(csq, start, end);
65993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              return this;
66993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            }
67993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
68993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            @Override public Appendable append(char c) throws IOException {
69993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              out.append(c);
70993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              return this;
71993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            }
72993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          };
73993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
74993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
75993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override
76993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      protected char[] escape(char c) {
77993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return null;
78993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
79993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    };
80993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
81993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
82993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} that does no escaping.
83993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
84993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper nullEscaper() {
85993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return NULL_ESCAPER;
86993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
87993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
88993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
89993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that escapes special characters in a
90993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string so it can safely be included in an XML document in either element
91993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * content or attribute values.
92993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
93993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b></p>: silently removes null-characters and control
94993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters, as there is no way to represent them in XML.
95993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
96993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper xmlEscaper() {
97993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return XML_ESCAPER;
98993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
99993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
100993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
101993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escapes special characters from a string so it can safely be included in an
102993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * XML document in either element content or attribute values.  Also removes
103993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * null-characters and control characters, as there is no way to represent
104993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * them in XML.
105993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
106993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper XML_ESCAPER = newBasicXmlEscapeBuilder()
107993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('"', "&quot;")
108993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('\'', "&apos;")
109993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .toEscaper();
110993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
111993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
112993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that escapes special characters in a
113993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string so it can safely be included in an XML document in element content.
114993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
115993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b></p>: double and single quotes are not escaped, so it is not
116993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * safe to use this escaper to escape attribute values. Use the
117993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@link #xmlEscaper()} escaper to escape attribute values or if you are
118993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * unsure. Also silently removes non-whitespace control characters, as there
119993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * is no way to represent them in XML.
120993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
121993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper xmlContentEscaper() {
122993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return XML_CONTENT_ESCAPER;
123993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
124993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
125993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
126993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escapes special characters from a string so it can safely be included in an
127993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * XML document in element content.  Note that quotes are <em>not</em>
128993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * escaped, so <em>this is not safe for use in attribute values</em>. Use
129993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@link #XML_ESCAPER} for attribute values, or if you are unsure.  Also
130993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * removes non-whitespace control characters, as there is no way to represent
131993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * them in XML.
132993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
133993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper XML_CONTENT_ESCAPER =
134993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      newBasicXmlEscapeBuilder().toEscaper();
135993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
136993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
137993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that escapes special characters in a
138993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string so it can safely be included in an HTML document in either element
139993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * content or attribute values.
140993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
141993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b></p>: alters non-ASCII and control characters.
142993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
143993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * The entity list was taken from:
144993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <a href="http://www.w3.org/TR/html4/sgml/entities.html">here</a>
145993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
146993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper htmlEscaper() {
147993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return HtmlEscaperHolder.HTML_ESCAPER;
148993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
149993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
150993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
151993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * A lazy initialization holder for HTML_ESCAPER.
152993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
153993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static class HtmlEscaperHolder {
154993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    private static final CharEscaper HTML_ESCAPER
155993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        = new HtmlCharEscaper(new CharEscaperBuilder()
156993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('"',      "&quot;")
157993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\'',     "&#39;")
158993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('&',      "&amp;")
159993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('<',      "&lt;")
160993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('>',      "&gt;")
161993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A0', "&nbsp;")
162993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A1', "&iexcl;")
163993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A2', "&cent;")
164993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A3', "&pound;")
165993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A4', "&curren;")
166993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A5', "&yen;")
167993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A6', "&brvbar;")
168993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A7', "&sect;")
169993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A8', "&uml;")
170993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00A9', "&copy;")
171993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00AA', "&ordf;")
172993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00AB', "&laquo;")
173993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00AC', "&not;")
174993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00AD', "&shy;")
175993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00AE', "&reg;")
176993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00AF', "&macr;")
177993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B0', "&deg;")
178993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B1', "&plusmn;")
179993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B2', "&sup2;")
180993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B3', "&sup3;")
181993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B4', "&acute;")
182993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B5', "&micro;")
183993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B6', "&para;")
184993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B7', "&middot;")
185993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B8', "&cedil;")
186993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00B9', "&sup1;")
187993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00BA', "&ordm;")
188993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00BB', "&raquo;")
189993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00BC', "&frac14;")
190993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00BD', "&frac12;")
191993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00BE', "&frac34;")
192993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00BF', "&iquest;")
193993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C0', "&Agrave;")
194993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C1', "&Aacute;")
195993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C2', "&Acirc;")
196993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C3', "&Atilde;")
197993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C4', "&Auml;")
198993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C5', "&Aring;")
199993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C6', "&AElig;")
200993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C7', "&Ccedil;")
201993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C8', "&Egrave;")
202993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00C9', "&Eacute;")
203993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00CA', "&Ecirc;")
204993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00CB', "&Euml;")
205993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00CC', "&Igrave;")
206993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00CD', "&Iacute;")
207993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00CE', "&Icirc;")
208993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00CF', "&Iuml;")
209993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D0', "&ETH;")
210993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D1', "&Ntilde;")
211993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D2', "&Ograve;")
212993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D3', "&Oacute;")
213993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D4', "&Ocirc;")
214993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D5', "&Otilde;")
215993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D6', "&Ouml;")
216993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D7', "&times;")
217993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D8', "&Oslash;")
218993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00D9', "&Ugrave;")
219993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00DA', "&Uacute;")
220993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00DB', "&Ucirc;")
221993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00DC', "&Uuml;")
222993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00DD', "&Yacute;")
223993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00DE', "&THORN;")
224993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00DF', "&szlig;")
225993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E0', "&agrave;")
226993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E1', "&aacute;")
227993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E2', "&acirc;")
228993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E3', "&atilde;")
229993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E4', "&auml;")
230993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E5', "&aring;")
231993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E6', "&aelig;")
232993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E7', "&ccedil;")
233993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E8', "&egrave;")
234993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00E9', "&eacute;")
235993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00EA', "&ecirc;")
236993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00EB', "&euml;")
237993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00EC', "&igrave;")
238993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00ED', "&iacute;")
239993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00EE', "&icirc;")
240993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00EF', "&iuml;")
241993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F0', "&eth;")
242993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F1', "&ntilde;")
243993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F2', "&ograve;")
244993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F3', "&oacute;")
245993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F4', "&ocirc;")
246993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F5', "&otilde;")
247993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F6', "&ouml;")
248993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F7', "&divide;")
249993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F8', "&oslash;")
250993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00F9', "&ugrave;")
251993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00FA', "&uacute;")
252993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00FB', "&ucirc;")
253993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00FC', "&uuml;")
254993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00FD', "&yacute;")
255993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00FE', "&thorn;")
256993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u00FF', "&yuml;")
257993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0152', "&OElig;")
258993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0153', "&oelig;")
259993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0160', "&Scaron;")
260993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0161', "&scaron;")
261993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0178', "&Yuml;")
262993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0192', "&fnof;")
263993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u02C6', "&circ;")
264993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u02DC', "&tilde;")
265993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0391', "&Alpha;")
266993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0392', "&Beta;")
267993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0393', "&Gamma;")
268993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0394', "&Delta;")
269993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0395', "&Epsilon;")
270993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0396', "&Zeta;")
271993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0397', "&Eta;")
272993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0398', "&Theta;")
273993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u0399', "&Iota;")
274993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u039A', "&Kappa;")
275993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u039B', "&Lambda;")
276993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u039C', "&Mu;")
277993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u039D', "&Nu;")
278993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u039E', "&Xi;")
279993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u039F', "&Omicron;")
280993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A0', "&Pi;")
281993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A1', "&Rho;")
282993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A3', "&Sigma;")
283993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A4', "&Tau;")
284993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A5', "&Upsilon;")
285993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A6', "&Phi;")
286993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A7', "&Chi;")
287993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A8', "&Psi;")
288993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03A9', "&Omega;")
289993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B1', "&alpha;")
290993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B2', "&beta;")
291993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B3', "&gamma;")
292993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B4', "&delta;")
293993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B5', "&epsilon;")
294993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B6', "&zeta;")
295993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B7', "&eta;")
296993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B8', "&theta;")
297993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03B9', "&iota;")
298993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03BA', "&kappa;")
299993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03BB', "&lambda;")
300993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03BC', "&mu;")
301993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03BD', "&nu;")
302993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03BE', "&xi;")
303993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03BF', "&omicron;")
304993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C0', "&pi;")
305993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C1', "&rho;")
306993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C2', "&sigmaf;")
307993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C3', "&sigma;")
308993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C4', "&tau;")
309993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C5', "&upsilon;")
310993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C6', "&phi;")
311993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C7', "&chi;")
312993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C8', "&psi;")
313993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03C9', "&omega;")
314993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03D1', "&thetasym;")
315993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03D2', "&upsih;")
316993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u03D6', "&piv;")
317993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2002', "&ensp;")
318993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2003', "&emsp;")
319993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2009', "&thinsp;")
320993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u200C', "&zwnj;")
321993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u200D', "&zwj;")
322993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u200E', "&lrm;")
323993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u200F', "&rlm;")
324993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2013', "&ndash;")
325993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2014', "&mdash;")
326993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2018', "&lsquo;")
327993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2019', "&rsquo;")
328993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u201A', "&sbquo;")
329993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u201C', "&ldquo;")
330993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u201D', "&rdquo;")
331993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u201E', "&bdquo;")
332993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2020', "&dagger;")
333993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2021', "&Dagger;")
334993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2022', "&bull;")
335993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2026', "&hellip;")
336993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2030', "&permil;")
337993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2032', "&prime;")
338993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2033', "&Prime;")
339993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2039', "&lsaquo;")
340993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u203A', "&rsaquo;")
341993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u203E', "&oline;")
342993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2044', "&frasl;")
343993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u20AC', "&euro;")
344993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2111', "&image;")
345993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2118', "&weierp;")
346993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u211C', "&real;")
347993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2122', "&trade;")
348993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2135', "&alefsym;")
349993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2190', "&larr;")
350993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2191', "&uarr;")
351993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2192', "&rarr;")
352993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2193', "&darr;")
353993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2194', "&harr;")
354993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u21B5', "&crarr;")
355993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u21D0', "&lArr;")
356993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u21D1', "&uArr;")
357993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u21D2', "&rArr;")
358993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u21D3', "&dArr;")
359993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u21D4', "&hArr;")
360993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2200', "&forall;")
361993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2202', "&part;")
362993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2203', "&exist;")
363993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2205', "&empty;")
364993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2207', "&nabla;")
365993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2208', "&isin;")
366993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2209', "&notin;")
367993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u220B', "&ni;")
368993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u220F', "&prod;")
369993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2211', "&sum;")
370993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2212', "&minus;")
371993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2217', "&lowast;")
372993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u221A', "&radic;")
373993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u221D', "&prop;")
374993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u221E', "&infin;")
375993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2220', "&ang;")
376993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2227', "&and;")
377993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2228', "&or;")
378993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2229', "&cap;")
379993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u222A', "&cup;")
380993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u222B', "&int;")
381993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2234', "&there4;")
382993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u223C', "&sim;")
383993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2245', "&cong;")
384993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2248', "&asymp;")
385993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2260', "&ne;")
386993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2261', "&equiv;")
387993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2264', "&le;")
388993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2265', "&ge;")
389993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2282', "&sub;")
390993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2283', "&sup;")
391993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2284', "&nsub;")
392993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2286', "&sube;")
393993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2287', "&supe;")
394993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2295', "&oplus;")
395993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2297', "&otimes;")
396993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u22A5', "&perp;")
397993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u22C5', "&sdot;")
398993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2308', "&lceil;")
399993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2309', "&rceil;")
400993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u230A', "&lfloor;")
401993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u230B', "&rfloor;")
402993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2329', "&lang;")
403993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u232A', "&rang;")
404993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u25CA', "&loz;")
405993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2660', "&spades;")
406993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2663', "&clubs;")
407993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2665', "&hearts;")
408993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .addEscape('\u2666', "&diams;")
409993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            .toArray());
410993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
411993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
412993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
413993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that escapes special characters in a
414993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string so it can safely be included in an HTML document in either element
415993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * content or attribute values.
416993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
417993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b></p>: does not alter non-ASCII and control characters.
418993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
419993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper asciiHtmlEscaper() {
420993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return ASCII_HTML_ESCAPER;
421993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
422993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
423993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
424993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escapes special characters from a string so it can safely be included in an
425993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * HTML document in either element content or attribute values. Does
426993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <em>not</em> alter non-ASCII characters or control characters.
427993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
428993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper ASCII_HTML_ESCAPER = new CharEscaperBuilder()
429993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('"', "&quot;")
430993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('\'', "&#39;")
431993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('&', "&amp;")
432993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('<', "&lt;")
433993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('>', "&gt;")
434993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .toEscaper();
435993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
436993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
437993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns an {@link Escaper} instance that escapes Java chars so they can be
438993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * safely included in URIs. For details on escaping URIs, see section 2.4 of
439993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
440993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
441993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>When encoding a String, the following rules apply:
442993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <ul>
443993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
444993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     through "9" remain the same.
445993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The special characters ".", "-", "*", and "_" remain the same.
446993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The space character " " is converted into a plus sign "+".
447993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>All other characters are converted into one or more bytes using UTF-8
448993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     encoding and each byte is then represented by the 3-character string
449993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
450993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     representation of the byte value.
451993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <ul>
452993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
453993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase
454993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
455993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * RFC 3986</a>:<br>
456993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <i>"URI producers and normalizers should use uppercase hexadecimal digits
457993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * for all percent-encodings."</i>
458993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
459993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>This escaper has identical behavior to (but is potentially much faster
460993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * than):
461993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <ul>
462993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>{@link com.google.httputil.FastURLEncoder#encode(String)}
463993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>{@link com.google.httputil.FastURLEncoder#encode(String,String)}
464993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     with the encoding name "UTF-8"
465993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>{@link java.net.URLEncoder#encode(String, String)}
466993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     with the encoding name "UTF-8"
467993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * </ul>
468993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
469993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>This method is equivalent to {@code uriEscaper(true)}.
470993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
471993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static Escaper uriEscaper() {
472993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return uriEscaper(true);
473993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
474993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
475993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
476993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns an {@link Escaper} instance that escapes Java chars so they can be
477993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * safely included in URI path segments. For details on escaping URIs, see
478993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * section 2.4 of <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
479993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
480993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>When encoding a String, the following rules apply:
481993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <ul>
482993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
483993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     through "9" remain the same.
484993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The unreserved characters ".", "-", "~", and "_" remain the same.
485993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The general delimiters "@" and ":" remain the same.
486993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The subdelimiters "!", "$", "&amp;", "'", "(", ")", "*", ",", ";",
487993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     and "=" remain the same.
488993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The space character " " is converted into %20.
489993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>All other characters are converted into one or more bytes using UTF-8
490993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     encoding and each byte is then represented by the 3-character string
491993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
492993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     representation of the byte value.
493993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * </ul>
494993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
495993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase
496993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
497993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * RFC 3986</a>:<br>
498993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <i>"URI producers and normalizers should use uppercase hexadecimal digits
499993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * for all percent-encodings."</i>
500993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
501993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static Escaper uriPathEscaper() {
502993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return URI_PATH_ESCAPER;
503993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
504993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
505993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
506993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns an {@link Escaper} instance that escapes Java chars so they can be
507993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * safely included in URI query string segments. When the query string
508993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * consists of a sequence of name=value pairs separated by &amp;, the names
509993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * and values should be individually encoded. If you escape an entire query
510993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string in one pass with this escaper, then the "=" and "&amp;" characters
511993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * used as separators will also be escaped.
512993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
513993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>This escaper is also suitable for escaping fragment identifiers.
514993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
515993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>For details on escaping URIs, see
516993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * section 2.4 of <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
517993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
518993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>When encoding a String, the following rules apply:
519993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <ul>
520993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
521993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     through "9" remain the same.
522993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The unreserved characters ".", "-", "~", and "_" remain the same.
523993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The general delimiters "@" and ":" remain the same.
524993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The path delimiters "/" and "?" remain the same.
525993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The subdelimiters "!", "$", "'", "(", ")", "*", ",", and ";",
526993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     remain the same.
527993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The space character " " is converted into %20.
528993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The equals sign "=" is converted into %3D.
529993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The ampersand "&amp;" is converted into %26.
530993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>All other characters are converted into one or more bytes using UTF-8
531993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     encoding and each byte is then represented by the 3-character string
532993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
533993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     representation of the byte value.
534993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * </ul>
535993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
536993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase
537993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
538993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * RFC 3986</a>:<br>
539993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <i>"URI producers and normalizers should use uppercase hexadecimal digits
540993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * for all percent-encodings."</i>
541993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
542993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>This method is equivalent to {@code uriQueryStringEscaper(false)}.
543993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
544993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static Escaper uriQueryStringEscaper() {
545993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return uriQueryStringEscaper(false);
546993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
547993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
548993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
549993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link Escaper} instance that escapes Java characters so they can
550993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * be safely included in URIs. For details on escaping URIs, see section 2.4
551993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * of <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
552993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
553993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>When encoding a String, the following rules apply:
554993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <ul>
555993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
556993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     through "9" remain the same.
557993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The special characters ".", "-", "*", and "_" remain the same.
558993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>If {@code plusForSpace} was specified, the space character " " is
559993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     converted into a plus sign "+". Otherwise it is converted into "%20".
560993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>All other characters are converted into one or more bytes using UTF-8
561993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     encoding and each byte is then represented by the 3-character string
562993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
563993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     representation of the byte value.
564993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * </ul>
565993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
566993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase
567993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
568993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * RFC 3986</a>:<br>
569993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <i>"URI producers and normalizers should use uppercase hexadecimal digits
570993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * for all percent-encodings."</i>
571993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
572993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param plusForSpace if {@code true} space is escaped to {@code +} otherwise
573993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        it is escaped to {@code %20}. Although common, the escaping of
574993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        spaces as plus signs has a very ambiguous status in the relevant
575993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        specifications. You should prefer {@code %20} unless you are doing
576993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        exact character-by-character comparisons of URLs and backwards
577993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        compatibility requires you to use plus signs.
578993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
579993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @see #uriEscaper()
580993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
581993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static Escaper uriEscaper(boolean plusForSpace) {
582993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return plusForSpace ? URI_ESCAPER : URI_ESCAPER_NO_PLUS;
583993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
584993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
585993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
586993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns an {@link Escaper} instance that escapes Java chars so they can be
587993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * safely included in URI query string segments. When the query string
588993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * consists of a sequence of name=value pairs separated by &amp;, the names
589993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * and values should be individually encoded. If you escape an entire query
590993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string in one pass with this escaper, then the "=" and "&amp;" characters
591993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * used as separators will also be escaped.
592993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
593993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>This escaper is also suitable for escaping fragment identifiers.
594993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
595993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>For details on escaping URIs, see
596993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * section 2.4 of <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
597993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
598993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>When encoding a String, the following rules apply:
599993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <ul>
600993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
601993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     through "9" remain the same.
602993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The unreserved characters ".", "-", "~", and "_" remain the same.
603993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The general delimiters "@" and ":" remain the same.
604993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The path delimiters "/" and "?" remain the same.
605993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The subdelimiters "!", "$", "'", "(", ")", "*", ",", and ";",
606993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     remain the same.
607993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>If {@code plusForSpace} was specified, the space character " " is
608993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     converted into a plus sign "+". Otherwise it is converted into "%20".
609993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The equals sign "=" is converted into %3D.
610993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The ampersand "&amp;" is converted into %26.
611993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>All other characters are converted into one or more bytes using UTF-8
612993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     encoding and each byte is then represented by the 3-character string
613993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
614993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     representation of the byte value.
615993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * </ul>
616993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
617993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase
618993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
619993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * RFC 3986</a>:<br>
620993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <i>"URI producers and normalizers should use uppercase hexadecimal digits
621993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * for all percent-encodings."</i>
622993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
623993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param plusForSpace if {@code true} space is escaped to {@code +} otherwise
624993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        it is escaped to {@code %20}. Although common, the escaping of
625993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        spaces as plus signs has a very ambiguous status in the relevant
626993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        specifications. You should prefer {@code %20} unless you are doing
627993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        exact character-by-character comparisons of URLs and backwards
628993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *        compatibility requires you to use plus signs.
629993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
630993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @see #uriQueryStringEscaper()
631993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
632993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static Escaper uriQueryStringEscaper(boolean plusForSpace) {
633993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return plusForSpace ?
634993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira           URI_QUERY_STRING_ESCAPER_WITH_PLUS : URI_QUERY_STRING_ESCAPER;
635993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
636993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
637993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final Escaper URI_ESCAPER =
638993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      new PercentEscaper(PercentEscaper.SAFECHARS_URLENCODER, true);
639993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
640993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final Escaper URI_ESCAPER_NO_PLUS =
641993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      new PercentEscaper(PercentEscaper.SAFECHARS_URLENCODER, false);
642993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
643993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final Escaper URI_PATH_ESCAPER =
644993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      new PercentEscaper(PercentEscaper.SAFEPATHCHARS_URLENCODER, false);
645993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
646993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final Escaper URI_QUERY_STRING_ESCAPER =
647993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      new PercentEscaper(PercentEscaper.SAFEQUERYSTRINGCHARS_URLENCODER, false);
648993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
649993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final Escaper URI_QUERY_STRING_ESCAPER_WITH_PLUS =
650993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      new PercentEscaper(PercentEscaper.SAFEQUERYSTRINGCHARS_URLENCODER, true);
651993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
652993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
653993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link Escaper} instance that escapes Java characters in a manner
654993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * compatible with the C++ webutil/url URL class (the {@code kGoogle1Escape}
655993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * set).
656993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
657993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>When encoding a String, the following rules apply:
658993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <ul>
659993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"
660993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * through "9" remain the same.
661993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The special characters "!", "(", ")", "*", "-", ".", "_", "~", ",", "/"
662993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * and ":" remain the same.
663993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>The space character " " is converted into a plus sign "+".
664993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <li>All other characters are converted into one or more bytes using UTF-8
665993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     encoding and each byte is then represented by the 3-character string
666993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     "%XY", where "XY" is the two-digit, uppercase, hexadecimal
667993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     representation of the byte value.
668993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * </ul>
669993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
670993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase
671993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * hexadecimal sequences. From <a href="http://www.ietf.org/rfc/rfc3986.txt">
672993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * RFC 3986</a>:<br>
673993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <i>"URI producers and normalizers should use uppercase hexadecimal digits
674993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * for all percent-encodings."</i>
675993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
676993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b>: This escaper is a special case and is <em>not
677993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * compliant</em> with <a href="http://www.ietf.org/rfc/rfc2396.txt">
678993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * RFC 2396</a>. Specifically it will not escape "/", ":" and ",". This is
679993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * only provided for certain limited use cases and you should favor using
680993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@link #uriEscaper()} whenever possible.
681993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
682993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static Escaper cppUriEscaper() {
683993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return CPP_URI_ESCAPER;
684993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
685993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
686993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Based on comments from FastURLEncoder:
687993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // These octets mimic the ones escaped by the C++ webutil/url URL class --
688993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // the kGoogle1Escape set.
689993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // To produce the same escaping as C++, use this set with the plusForSpace
690993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // option.
691993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // WARNING: Contrary to RFC 2396 ",", "/" and ":" are listed as safe here.
692993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final Escaper CPP_URI_ESCAPER =
693993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      new PercentEscaper("!()*-._~,/:", true);
694993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
695993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
696993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that escapes special characters in a
697993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string so it can safely be included in a Java string literal.
698993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
699993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note</b></p>: does not escape single quotes, so use the escaper
700993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * returned by {@link #javaCharEscaper()} if you are generating char
701993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * literals or if you are unsure.
702993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
703993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper javaStringEscaper() {
704993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return JAVA_STRING_ESCAPER;
705993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
706993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
707993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
708993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escapes special characters from a string so it can safely be included in a
709993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Java string literal. Does <em>not</em> escape single-quotes, so use
710993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * JAVA_CHAR_ESCAPE if you are generating char literals, or if you are unsure.
711993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
712993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>Note that non-ASCII characters will be octal or Unicode escaped.
713993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
714993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper JAVA_STRING_ESCAPER
715993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      = new JavaCharEscaper(new CharEscaperBuilder()
716993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\b', "\\b")
717993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\f', "\\f")
718993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\n', "\\n")
719993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\r', "\\r")
720993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\t', "\\t")
721993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\"', "\\\"")
722993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\\', "\\\\")
723993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .toArray());
724993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
725993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
726993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that escapes special characters in a
727993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string so it can safely be included in a Java char or string literal. The
728993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * behavior of this escaper is the same as that of the
729993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@link #javaStringEscaper()}, except it also escapes single quotes.
730993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
731993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper javaCharEscaper() {
732993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return JAVA_CHAR_ESCAPER;
733993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
734993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
735993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
736993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escapes special characters from a string so it can safely be included in a
737993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Java char literal or string literal.
738993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
739993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>Note that non-ASCII characters will be octal or Unicode escaped.
740993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
741993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>This is the same as {@link #JAVA_STRING_ESCAPER}, except that it escapes
742993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * single quotes.
743993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
744993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper JAVA_CHAR_ESCAPER
745993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      = new JavaCharEscaper(new CharEscaperBuilder()
746993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\b', "\\b")
747993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\f', "\\f")
748993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\n', "\\n")
749993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\r', "\\r")
750993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\t', "\\t")
751993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\'', "\\'")
752993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\"', "\\\"")
753993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\\', "\\\\")
754993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .toArray());
755993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
756993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
757993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that replaces non-ASCII characters
758993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * in a string with their Unicode escape sequences ({@code \\uxxxx} where
759993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@code xxxx} is a hex number). Existing escape sequences won't be affected.
760993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
761993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper javaStringUnicodeEscaper() {
762993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return JAVA_STRING_UNICODE_ESCAPER;
763993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
764993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
765993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
766993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escapes each non-ASCII character in with its Unicode escape sequence
767993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@code \\uxxxx} where {@code xxxx} is a hex number. Existing escape
768993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * sequences won't be affected.
769993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
770993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper JAVA_STRING_UNICODE_ESCAPER
771993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      = new CharEscaper() {
772993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          @Override protected char[] escape(char c) {
773993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            if (c <= 127) {
774993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira              return null;
775993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            }
776993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
777993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            char[] r = new char[6];
778993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            r[5] = HEX_DIGITS[c & 15];
779993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            c >>>= 4;
780993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            r[4] = HEX_DIGITS[c & 15];
781993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            c >>>= 4;
782993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            r[3] = HEX_DIGITS[c & 15];
783993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            c >>>= 4;
784993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            r[2] = HEX_DIGITS[c & 15];
785993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            r[1] = 'u';
786993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            r[0] = '\\';
787993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            return r;
788993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          }
789993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        };
790993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
791993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
792993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that escapes special characters from
793993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * a string so it can safely be included in a Python string literal. Does not
794993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * have any special handling for non-ASCII characters.
795993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
796993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper pythonEscaper() {
797993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return PYTHON_ESCAPER;
798993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
799993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
800993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
801993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escapes special characters in a string so it can safely be included in a
802993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Python string literal. Does not have any special handling for non-ASCII
803993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters.
804993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
805993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper PYTHON_ESCAPER = new CharEscaperBuilder()
806993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // TODO(laurence): perhaps this should escape non-ASCII characters?
807993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('\n', "\\n")
808993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('\r', "\\r")
809993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('\t', "\\t")
810993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('\\', "\\\\")
811993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('\"', "\\\"")
812993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .addEscape('\'', "\\\'")
813993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .toEscaper();
814993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
815993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
816993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@link CharEscaper} instance that escapes non-ASCII characters in
817993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * a string so it can safely be included in a Javascript string literal.
818993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Non-ASCII characters are replaced with their ASCII javascript escape
819993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * sequences (e.g., \\uhhhh or \xhh).
820993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
821993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper javascriptEscaper() {
822993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return JAVASCRIPT_ESCAPER;
823993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
824993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
825993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
826993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@code CharEscaper} to escape javascript strings. Turns all non-ASCII
827993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters into ASCII javascript escape sequences (e.g., \\uhhhh or \xhh).
828993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
829993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final CharEscaper JAVASCRIPT_ESCAPER
830993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      = new JavascriptCharEscaper(new CharEscaperBuilder()
831993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\'', "\\x27")
832993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('"',  "\\x22")
833993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('<',  "\\x3c")
834993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('=',  "\\x3d")
835993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('>',  "\\x3e")
836993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('&',  "\\x26")
837993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\b', "\\b")
838993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\t', "\\t")
839993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\n', "\\n")
840993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\f', "\\f")
841993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\r', "\\r")
842993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .addEscape('\\', "\\\\")
843993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .toArray());
844993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
845993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static CharEscaperBuilder newBasicXmlEscapeBuilder() {
846993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new CharEscaperBuilder()
847993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        .addEscape('&', "&amp;")
848993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        .addEscape('<', "&lt;")
849993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        .addEscape('>', "&gt;")
850993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        .addEscapes(new char[] {
851993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            '\000', '\001', '\002', '\003', '\004',
852993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            '\005', '\006', '\007', '\010', '\013',
853993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            '\014', '\016', '\017', '\020', '\021',
854993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            '\022', '\023', '\024', '\025', '\026',
855993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            '\027', '\030', '\031', '\032', '\033',
856993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            '\034', '\035', '\036', '\037'}, "");
857993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
858993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
859993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
860993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a composite {@link CharEscaper} instance that tries to escape
861993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters using a primary {@code CharEscaper} first and falls back to a
862993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * secondary one if there is no escaping.
863993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
864993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The returned escaper will attempt to escape each character using the
865993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * primary escaper, and if the primary escaper has no escaping for that
866993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * character, it will use the secondary escaper. If the secondary escaper has
867993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * no escaping for a character either, the original character will be used.
868993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * If the primary escaper has an escape for a character, the secondary escaper
869993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * will not be used at all for that character; the escaped output of the
870993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * primary is not run through the secondary. For a case where you would like
871993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * to first escape with one escaper, and then with another, it is recommended
872993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * that you call each escaper in order.
873993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
874993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param primary The primary {@code CharEscaper} to use
875993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param secondary The secondary {@code CharEscaper} to use if the first one
876993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     has no escaping rule for a character
877993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @throws NullPointerException if any of the arguments is null
878993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
879993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharEscaper fallThrough(CharEscaper primary,
880993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      CharEscaper secondary) {
881993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    checkNotNull(primary);
882993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    checkNotNull(secondary);
883993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new FallThroughCharEscaper(primary, secondary);
884993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
885993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
886993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
887993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * A fast {@link CharEscaper} that uses an array of replacement characters and
888993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * a range of safe characters. It overrides {@link #escape(String)} to improve
889993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * performance. Rough benchmarking shows that this almost doubles the speed
890993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * when processing strings that do not require escaping (providing the escape
891993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * test itself is efficient).
892993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
893993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static abstract class FastCharEscaper extends CharEscaper {
894993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
895993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    protected final char[][] replacements;
896993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    protected final int replacementLength;
897993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    protected final char safeMin;
898993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    protected final char safeMax;
899993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
900993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    public FastCharEscaper(char[][] replacements, char safeMin, char safeMax) {
901993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      this.replacements = replacements;
902993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      this.replacementLength = replacements.length;
903993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      this.safeMin = safeMin;
904993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      this.safeMax = safeMax;
905993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
906993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
907993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    /** Overridden for performance (see {@link FastCharEscaper}). */
908993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String escape(String s) {
909993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      int slen = s.length();
910993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      for (int index = 0; index < slen; index++) {
911993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        char c = s.charAt(index);
912993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if ((c < replacementLength && replacements[c] != null)
913993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            || c < safeMin || c > safeMax) {
914993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          return escapeSlow(s, index);
915993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
916993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
917993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return s;
918993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
919993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
920993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
921993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
922993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escaper for Java character escaping, contains both an array and a
923993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * backup function.  We're not overriding the array decorator because we
924993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * want to keep this as fast as possible, so no calls to super.escape first.
925993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
926993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static class JavaCharEscaper extends FastCharEscaper {
927993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
928993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    public JavaCharEscaper(char[][] replacements) {
929993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      super(replacements, ' ', '~');
930993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
931993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
932993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override protected char[] escape(char c) {
933993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // First check if our array has a valid escaping.
934993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (c < replacementLength) {
935993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        char[] r = replacements[c];
936993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (r != null) {
937993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          return r;
938993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
939993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
940993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
941993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // This range is un-escaped.
942993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (safeMin <= c && c <= safeMax) {
943993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return null;
944993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
945993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
946993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (c <= 0xFF) {
947993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        // Convert c to an octal-escaped string.
948993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        // Equivalent to String.format("\\%03o", (int)c);
949993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        char[] r = new char[4];
950993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        r[0] = '\\';
951993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        r[3] = HEX_DIGITS[c & 7];
952993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        c >>>= 3;
953993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        r[2] = HEX_DIGITS[c & 7];
954993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        c >>>= 3;
955993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        r[1] = HEX_DIGITS[c & 7];
956993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return r;
957993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
958993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
959993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // Convert c to a hex-escaped string.
960993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // Equivalent to String.format("\\u%04x", (int)c);
961993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      char[] r = new char[6];
962993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[0] = '\\';
963993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[1] = 'u';
964993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[5] = HEX_DIGITS[c & 15];
965993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      c >>>= 4;
966993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[4] = HEX_DIGITS[c & 15];
967993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      c >>>= 4;
968993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[3] = HEX_DIGITS[c & 15];
969993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      c >>>= 4;
970993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[2] = HEX_DIGITS[c & 15];
971993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return r;
972993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
973993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
974993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
975993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
976993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escaper for javascript character escaping, contains both an array and a
977993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * backup function. We're not overriding the array decorator because we
978993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * want to keep this as fast as possible, so no calls to super.escape first.
979993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
980993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static class JavascriptCharEscaper extends FastCharEscaper {
981993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
982993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    public JavascriptCharEscaper(char[][] replacements) {
983993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      super(replacements, ' ', '~');
984993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
985993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
986993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override protected char[] escape(char c) {
987993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // First check if our array has a valid escaping.
988993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (c < replacementLength) {
989993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        char[] r = replacements[c];
990993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (r != null) {
991993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          return r;
992993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
993993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
994993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
995993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // This range is unescaped.
996993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (safeMin <= c && c <= safeMax) {
997993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return null;
998993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
999993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1000993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // we can do a 2 digit hex escape for chars less that 0x100
1001993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (c < 0x100) {
1002993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        char[] r = new char[4];
1003993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        r[3] = HEX_DIGITS[c & 0xf];
1004993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        c >>>= 4;
1005993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        r[2] = HEX_DIGITS[c & 0xf];
1006993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        r[1] = 'x';
1007993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        r[0] = '\\';
1008993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return r;
1009993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1010993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1011993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // 4 digit hex escape everything else
1012993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      char[] r = new char[6];
1013993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[5] = HEX_DIGITS[c & 0xf];
1014993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      c >>>= 4;
1015993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[4] = HEX_DIGITS[c & 0xf];
1016993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      c >>>= 4;
1017993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[3] = HEX_DIGITS[c & 0xf];
1018993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      c >>>= 4;
1019993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[2] = HEX_DIGITS[c & 0xf];
1020993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[1] = 'u';
1021993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      r[0] = '\\';
1022993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return r;
1023993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1024993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
1025993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1026993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
1027993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Escaper for HTML character escaping, contains both an array and a
1028993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * backup function.  We're not overriding the array decorator because we
1029993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * want to keep this as fast as possible, so no calls to super.escape first.
1030993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
1031993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static class HtmlCharEscaper extends FastCharEscaper {
1032993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1033993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    public HtmlCharEscaper(char[][] replacements) {
1034993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      super(replacements, Character.MIN_VALUE, '~');
1035993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1036993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1037993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override protected char[] escape(char c) {
1038993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // First check if our array has a valid escaping.
1039993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (c < replacementLength) {
1040993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        char[] r = replacements[c];
1041993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (r != null) {
1042993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          return r;
1043993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
1044993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1045993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1046993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // ~ is ASCII 126, the highest value char that does not need
1047993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // to be escaped
1048993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (c <= safeMax) {
1049993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return null;
1050993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1051993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1052993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      int index;
1053993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (c < 1000) {
1054993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        index = 4;
1055993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      } else if (c < 10000) {
1056993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        index = 5;
1057993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      } else {
1058993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        index = 6;
1059993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1060993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      char[] result = new char[index + 2];
1061993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      result[0] = '&';
1062993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      result[1] = '#';
1063993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      result[index + 1] = ';';
1064993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1065993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // TODO(sven): Convert this to a sequence of shifts/additions
1066993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // to avoid the division and modulo operators.
1067993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      int intValue = c;
1068993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      for (; index > 1; index--) {
1069993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        result[index] = HEX_DIGITS[intValue % 10];
1070993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        intValue /= 10;
1071993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1072993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return result;
1073993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1074993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
1075993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1076993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
1077993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * A composite {@code CharEscaper} object that tries to escape characters
1078993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * using a primary {@code CharEscaper} first and falls back to a secondary
1079993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * one if there is no escaping.
1080993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
1081993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static class FallThroughCharEscaper extends CharEscaper {
1082993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1083993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    private final CharEscaper primary;
1084993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    private final CharEscaper secondary;
1085993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1086993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    public FallThroughCharEscaper(CharEscaper primary, CharEscaper secondary) {
1087993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      this.primary = primary;
1088993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      this.secondary = secondary;
1089993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1090993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1091993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override
1092993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    protected char[] escape(char c) {
1093993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      char result[] = primary.escape(c);
1094993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (result == null) {
1095993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        result = secondary.escape(c);
1096993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1097993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return result;
1098993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1099993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
1100993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1101993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final char[] HEX_DIGITS = "0123456789abcdef".toCharArray();
1102993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira}