1993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira/*
2993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * Copyright (C) 2008 Google Inc.
3993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
4993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * Licensed under the Apache License, Version 2.0 (the "License");
5993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * you may not use this file except in compliance with the License.
6993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * You may obtain a copy of the License at
7993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
8993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * http://www.apache.org/licenses/LICENSE-2.0
9993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
10993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * Unless required by applicable law or agreed to in writing, software
11993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * distributed under the License is distributed on an "AS IS" BASIS,
12993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * See the License for the specific language governing permissions and
14993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * limitations under the License.
15993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira */
16993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
171bdbfefe4b144c7b031a1d9242a0fa061a0ae6b5Scott Kennedypackage com.google.android.mail.common.base;
18993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
191bdbfefe4b144c7b031a1d9242a0fa061a0ae6b5Scott Kennedyimport static com.google.android.mail.common.base.Preconditions.checkArgument;
201bdbfefe4b144c7b031a1d9242a0fa061a0ae6b5Scott Kennedyimport static com.google.android.mail.common.base.Preconditions.checkNotNull;
21993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
22993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereiraimport java.util.ArrayList;
23993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereiraimport java.util.Arrays;
24993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereiraimport java.util.List;
25993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
26993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira/**
27993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * Determines a true or false value for any Java {@code char} value, just as
28993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * {@link Predicate} does for any {@link Object}. Also offers basic text
29993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * processing methods based on this function. Implementations are strongly
30993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * encouraged to be side-effect-free and immutable.
31993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
32993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * <p>Throughout the documentation of this class, the phrase "matching
33993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * character" is used to mean "any character {@code c} for which {@code
34993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * this.matches(c)} returns {@code true}".
35993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
36993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * <p><b>Note:</b> This class deals only with {@code char} values; it does not
37993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * understand supplementary Unicode code points in the range {@code 0x10000} to
38993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * {@code 0x10FFFF}. Such logical characters are encoded into a {@code String}
39993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * using surrogate pairs, and a {@code CharMatcher} treats these just as two
40993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * separate characters.
41993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira *
42993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * @author Kevin Bourrillion
43993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira * @since 2009.09.15 <b>tentative</b>
44993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira */
45993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereirapublic abstract class CharMatcher implements Predicate<Character> {
46993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
47993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Constants
48993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
49993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Excludes 2000-2000a, which is handled as a range
50993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final String BREAKING_WHITESPACE_CHARS =
51993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
52993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
53993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Excludes 2007, which is handled as a gap in a pair of ranges
54993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static final String NON_BREAKING_WHITESPACE_CHARS =
55993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      "\u00a0\u180e\u202f";
56993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
57993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
58993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is whitespace according to the latest
59993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Unicode standard, as illustrated
60993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
61993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * This is not the same definition used by other Java APIs. See a comparison
62993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * of several definitions of "whitespace" at
63993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <a href="TODO">(TODO)</a>.
64993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
65993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this
66993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * constant to keep it up to date.
67993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
68993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher WHITESPACE =
69993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
70993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .or(inRange('\u2000', '\u200a'));
71993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
72993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
73993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is a breaking whitespace (that is,
74993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * a whitespace which can be interpreted as a break between words
75993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * for formatting purposes).  See {@link #WHITESPACE} for a discussion
76993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * of that term.
77993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
78993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @since 2010.01.04 <b>tentative</b>
79993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
80993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher BREAKING_WHITESPACE =
81993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      anyOf(BREAKING_WHITESPACE_CHARS)
82993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .or(inRange('\u2000', '\u2006'))
83993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          .or(inRange('\u2008', '\u200a'));
84993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
85993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
86993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is ASCII, meaning that its code point is
87993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * less than 128.
88993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
89993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher ASCII = inRange('\0', '\u007f');
90993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
91993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
92993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is a digit according to
93993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
94993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
95993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher DIGIT;
96993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
97993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  static {
98993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    CharMatcher digit = inRange('0', '9');
99993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    String zeroes =
100993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
101993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
102993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
103993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (char base : zeroes.toCharArray()) {
104993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      digit = digit.or(inRange(base, (char) (base + 9)));
105993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
106993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    DIGIT = digit;
107993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
108993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
109993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
110993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is whitespace according to {@link
111993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Character#isWhitespace(char) Java's definition}; it is usually preferable
112993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * to use {@link #WHITESPACE}. See a comparison of several definitions of
113993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * "whitespace" at <a href="http://go/white+space">go/white+space</a>.
114993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
115993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher JAVA_WHITESPACE
116993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      = inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
117993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u001c', '\u0020'))
118993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u1680'))
119993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u180e'))
120993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u2000', '\u2006'))
121993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u2008', '\u200b'))
122993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u2028', '\u2029'))
123993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u205f'))
124993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u3000'));
125993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
126993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
127993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is a digit according to {@link
128993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Character#isDigit(char) Java's definition}. If you only care to match
129993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ASCII digits, you can use {@code inRange('0', '9')}.
130993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
131993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
132993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
133993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return Character.isDigit(c);
134993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
135993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  };
136993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
137993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
138993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is a letter according to {@link
139993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Character#isLetter(char) Java's definition}. If you only care to match
140993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * letters of the Latin alphabet, you can use {@code
141993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * inRange('a', 'z').or(inRange('A', 'Z'))}.
142993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
143993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher JAVA_LETTER = new CharMatcher() {
144993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
145993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return Character.isLetter(c);
146993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
147993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  };
148993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
149993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
150993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is a letter or digit according to {@link
151993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Character#isLetterOrDigit(char) Java's definition}.
152993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
153993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
154993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
155993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return Character.isLetterOrDigit(c);
156993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
157993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  };
158993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
159993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
160993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is upper case according to {@link
161993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Character#isUpperCase(char) Java's definition}.
162993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
163993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
164993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
165993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return Character.isUpperCase(c);
166993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
167993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  };
168993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
169993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
170993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is lower case according to {@link
171993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Character#isLowerCase(char) Java's definition}.
172993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
173993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
174993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
175993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return Character.isLowerCase(c);
176993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
177993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  };
178993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
179993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
180993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is an ISO control character according to
181993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@link Character#isISOControl(char)}.
182993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
183993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher JAVA_ISO_CONTROL = inRange('\u0000', '\u001f')
184993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u007f', '\u009f'));
185993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
186993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
187993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is invisible; that is, if its Unicode
188993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * category is any of SPACE_SEPARATOR, LINE_SEPARATOR,
189993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according
190993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * to ICU4J.
191993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
192993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
193993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u007f', '\u00a0'))
194993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u00ad'))
195993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u0600', '\u0603'))
196993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
197993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u2000', '\u200f'))
198993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u2028', '\u202f'))
199993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u205f', '\u2064'))
200993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u206a', '\u206f'))
201993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u3000'))
202993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\ud800', '\uf8ff'))
203993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(anyOf("\ufeff\ufff9\ufffa\ufffb"));
204993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
205993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
206993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is single-width (not double-width).  When
207993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * in doubt, this matcher errs on the side of returning {@code false} (that
208993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * is, it tends to assume a character is double-width).
209993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
210993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <b>Note:</b> as the reference file evolves, we will modify this constant
211993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * to keep it up to date.
212993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
213993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
214993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u05be'))
215993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u05d0', '\u05ea'))
216993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u05f3'))
217993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(is('\u05f4'))
218993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u0600', '\u06ff'))
219993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u0750', '\u077f'))
220993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u0e00', '\u0e7f'))
221993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u1e00', '\u20af'))
222993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\u2100', '\u213a'))
223993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\ufb50', '\ufdff'))
224993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\ufe70', '\ufeff'))
225993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      .or(inRange('\uff61', '\uffdc'));
226993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
227993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
228993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Determines whether a character is whitespace according to an arbitrary definition used by
229993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@link StringUtil} for years. Most likely you don't want to use this. See a comparison of
230993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * several definitions of "whitespace" at <a href="http://goto/white space">goto/white space</a>.
231993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
232993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>To be deprecated.</b> use {@link #WHITESPACE} to switch to the Unicode definition, or
233993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * create a matcher for the specific characters you want. Not deprecated yet because it is a
234993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * stepping stone for getting off of many deprecated {@link StringUtil} methods.
235993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
236993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  @Deprecated
237993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher LEGACY_WHITESPACE =
238993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      anyOf(" \r\n\t\u3000\u00A0\u2007\u202F").precomputed();
239993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
240993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
241993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /** Matches any character. */
242993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher ANY = new CharMatcher() {
243993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
244993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return true;
245993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
246993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
247993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public int indexIn(CharSequence sequence) {
248993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return (sequence.length() == 0) ? -1 : 0;
249993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
250993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public int indexIn(CharSequence sequence, int start) {
251993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      int length = sequence.length();
252993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      Preconditions.checkPositionIndex(start, length);
253993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return (start == length) ? -1 : start;
254993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
255993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public int lastIndexIn(CharSequence sequence) {
256993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.length() - 1;
257993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
258993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matchesAllOf(CharSequence sequence) {
259993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(sequence);
260993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return true;
261993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
262993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matchesNoneOf(CharSequence sequence) {
263993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.length() == 0;
264993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
265993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String removeFrom(CharSequence sequence) {
266993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(sequence);
267993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return "";
268993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
269993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String replaceFrom(
270993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        CharSequence sequence, char replacement) {
271993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      char[] array = new char[sequence.length()];
272993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      Arrays.fill(array, replacement);
273993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return new String(array);
274993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
275993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String replaceFrom(
276993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        CharSequence sequence, CharSequence replacement) {
277993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
278993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      for (int i = 0; i < sequence.length(); i++) {
279993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        retval.append(replacement);
280993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
281993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return retval.toString();
282993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
283993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String collapseFrom(CharSequence sequence, char replacement) {
284993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return (sequence.length() == 0) ? "" : String.valueOf(replacement);
285993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
286993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String trimFrom(CharSequence sequence) {
287993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(sequence);
288993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return "";
289993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
290993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public int countIn(CharSequence sequence) {
291993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.length();
292993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
293993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher and(CharMatcher other) {
294993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return checkNotNull(other);
295993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
296993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher or(CharMatcher other) {
297993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(other);
298993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return this;
299993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
300993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher negate() {
301993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return NONE;
302993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
303993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher precomputed() {
304993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return this;
305993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
306993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  };
307993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
308993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /** Matches no characters. */
309993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static final CharMatcher NONE = new CharMatcher() {
310993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
311993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return false;
312993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
313993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
314993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public int indexIn(CharSequence sequence) {
315993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(sequence);
316993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return -1;
317993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
318993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public int indexIn(CharSequence sequence, int start) {
319993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      int length = sequence.length();
320993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      Preconditions.checkPositionIndex(start, length);
321993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return -1;
322993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
323993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public int lastIndexIn(CharSequence sequence) {
324993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(sequence);
325993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return -1;
326993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
327993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matchesAllOf(CharSequence sequence) {
328993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.length() == 0;
329993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
330993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matchesNoneOf(CharSequence sequence) {
331993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(sequence);
332993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return true;
333993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
334993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String removeFrom(CharSequence sequence) {
335993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.toString();
336993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
337993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String replaceFrom(
338993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        CharSequence sequence, char replacement) {
339993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.toString();
340993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
341993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String replaceFrom(
342993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        CharSequence sequence, CharSequence replacement) {
343993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(replacement);
344993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.toString();
345993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
346993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String collapseFrom(
347993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        CharSequence sequence, char replacement) {
348993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.toString();
349993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
350993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public String trimFrom(CharSequence sequence) {
351993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.toString();
352993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
353993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public int countIn(CharSequence sequence) {
354993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(sequence);
355993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return 0;
356993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
357993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher and(CharMatcher other) {
358993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      checkNotNull(other);
359993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return this;
360993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
361993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher or(CharMatcher other) {
362993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return checkNotNull(other);
363993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
364993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher negate() {
365993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return ANY;
366993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
367993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override protected void setBits(LookupTable table) {
368993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
369993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher precomputed() {
370993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return this;
371993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
372993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  };
373993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
374993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Static factories
375993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
376993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
377993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@code char} matcher that matches only one specified character.
378993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
379993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharMatcher is(final char match) {
380993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new CharMatcher() {
381993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matches(char c) {
382993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return c == match;
383993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
384993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
385993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public String replaceFrom(
386993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          CharSequence sequence, char replacement) {
387993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return sequence.toString().replace(match, replacement);
388993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
389993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher and(CharMatcher other) {
390993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return other.matches(match) ? this : NONE;
391993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
392993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher or(CharMatcher other) {
393993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return other.matches(match) ? other : super.or(other);
394993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
395993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher negate() {
396993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return isNot(match);
397993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
398993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override protected void setBits(LookupTable table) {
399993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        table.set(match);
400993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
401993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher precomputed() {
402993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return this;
403993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
404993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    };
405993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
406993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
407993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
408993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@code char} matcher that matches any character except the one
409993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * specified.
410993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
411993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
412993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
413993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharMatcher isNot(final char match) {
414993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new CharMatcher() {
415993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matches(char c) {
416993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return c != match;
417993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
418993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
419993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher and(CharMatcher other) {
420993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return other.matches(match) ? super.and(other) : other;
421993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
422993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher or(CharMatcher other) {
423993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return other.matches(match) ? ANY : this;
424993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
425993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher negate() {
426993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return is(match);
427993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
428993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    };
429993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
430993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
431993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
432993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@code char} matcher that matches any character present in the
433993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * given character sequence.
434993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
435993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharMatcher anyOf(final CharSequence sequence) {
436993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    switch (sequence.length()) {
437993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      case 0:
438993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return NONE;
439993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      case 1:
440993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return is(sequence.charAt(0));
441993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      case 2:
442993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        final char match1 = sequence.charAt(0);
443993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        final char match2 = sequence.charAt(1);
444993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return new CharMatcher() {
445993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          @Override public boolean matches(char c) {
446993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            return c == match1 || c == match2;
447993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          }
448993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          @Override protected void setBits(LookupTable table) {
449993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            table.set(match1);
450993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            table.set(match2);
451993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          }
452993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          @Override public CharMatcher precomputed() {
453993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            return this;
454993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          }
455993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        };
456993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
457993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
458993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    final char[] chars = sequence.toString().toCharArray();
459993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    Arrays.sort(chars); // not worth collapsing duplicates
460993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
461993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new CharMatcher() {
462993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matches(char c) {
463993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return Arrays.binarySearch(chars, c) >= 0;
464993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
465993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override protected void setBits(LookupTable table) {
466993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        for (char c : chars) {
467993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          table.set(c);
468993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
469993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
470993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    };
471993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
472993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
473993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
474993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@code char} matcher that matches any character not present in
475993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * the given character sequence.
476993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
477993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharMatcher noneOf(CharSequence sequence) {
478993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return anyOf(sequence).negate();
479993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
480993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
481993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
482993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@code char} matcher that matches any character in a given range
483993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * (both endpoints are inclusive). For example, to match any lowercase letter
484993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * of the English alphabet, use {@code CharMatcher.inRange('a', 'z')}.
485993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
486993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
487993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
488993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharMatcher inRange(
489993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      final char startInclusive, final char endInclusive) {
490993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    checkArgument(endInclusive >= startInclusive);
491993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new CharMatcher() {
492993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matches(char c) {
493993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return startInclusive <= c && c <= endInclusive;
494993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
495993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override protected void setBits(LookupTable table) {
496993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        char c = startInclusive;
497993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        while (true) {
498993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          table.set(c);
499993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          if (c++ == endInclusive) {
500993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira            break;
501993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          }
502993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
503993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
504993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher precomputed() {
505993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return this;
506993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
507993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    };
508993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
509993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
510993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
511993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a matcher with identical behavior to the given {@link
512993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Character}-based predicate, but which operates on primitive {@code char}
513993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * instances instead.
514993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
515993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public static CharMatcher forPredicate(
516993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      final Predicate<? super Character> predicate) {
517993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    checkNotNull(predicate);
518993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    if (predicate instanceof CharMatcher) {
519993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return (CharMatcher) predicate;
520993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
521993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new CharMatcher() {
522993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matches(char c) {
523993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return predicate.apply(c);
524993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
525993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean apply(Character character) {
526993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return predicate.apply(checkNotNull(character));
527993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
528993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    };
529993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
530993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
531993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Abstract methods
532993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
533993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /** Determines a true or false value for the given character. */
534993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public abstract boolean matches(char c);
535993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
536993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Non-static factories
537993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
538993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
539993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a matcher that matches any character not matched by this matcher.
540993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
541993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public CharMatcher negate() {
542993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    final CharMatcher original = this;
543993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new CharMatcher() {
544993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matches(char c) {
545993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return !original.matches(c);
546993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
547993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
548993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matchesAllOf(CharSequence sequence) {
549993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return original.matchesNoneOf(sequence);
550993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
551993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matchesNoneOf(CharSequence sequence) {
552993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return original.matchesAllOf(sequence);
553993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
554993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public int countIn(CharSequence sequence) {
555993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return sequence.length() - original.countIn(sequence);
556993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
557993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher negate() {
558993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return original;
559993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
560993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    };
561993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
562993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
563993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
564993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a matcher that matches any character matched by both this matcher
565993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * and {@code other}.
566993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
567993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public CharMatcher and(CharMatcher other) {
568993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new And(Arrays.asList(this, checkNotNull(other)));
569993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
570993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
571993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static class And extends CharMatcher {
572993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    List<CharMatcher> components;
573993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
574993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    And(List<CharMatcher> components) {
575993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      this.components = components; // Skip defensive copy (private)
576993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
577993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
578993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
579993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      for (CharMatcher matcher : components) {
580993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (!matcher.matches(c)) {
581993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          return false;
582993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
583993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
584993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return true;
585993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
586993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
587993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher and(CharMatcher other) {
588993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
589993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      newComponents.add(checkNotNull(other));
590993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return new And(newComponents);
591993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
592993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
593993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
594993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
595993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a matcher that matches any character matched by either this matcher
596993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * or {@code other}.
597993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
598993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public CharMatcher or(CharMatcher other) {
599993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new Or(Arrays.asList(this, checkNotNull(other)));
600993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
601993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
602993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  private static class Or extends CharMatcher {
603993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    List<CharMatcher> components;
604993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
605993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    Or(List<CharMatcher> components) {
606993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      this.components = components; // Skip defensive copy (private)
607993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
608993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
609993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public boolean matches(char c) {
610993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      for (CharMatcher matcher : components) {
611993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (matcher.matches(c)) {
612993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          return true;
613993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
614993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
615993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return false;
616993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
617993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
618993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override public CharMatcher or(CharMatcher other) {
619993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
620993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      newComponents.add(checkNotNull(other));
621993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return new Or(newComponents);
622993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
623993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
624993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    @Override protected void setBits(LookupTable table) {
625993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      for (CharMatcher matcher : components) {
626993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        matcher.setBits(table);
627993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
628993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
629993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
630993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
631993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
632993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a {@code char} matcher functionally equivalent to this one, but
633993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * which may be faster to query than the original; your mileage may vary.
634993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Precomputation takes time and is likely to be worthwhile only if the
635993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * precomputed matcher is queried many thousands of times.
636993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
637993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>This method has no effect (returns {@code this}) when called in GWT:
638993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * it's unclear whether a precomputed matcher is faster, but it certainly
639993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * consumes more memory, which doesn't seem like a worthwhile tradeoff in a
640993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * browser.
641993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
642993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public CharMatcher precomputed() {
643993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return Platform.precomputeCharMatcher(this);
644993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
645993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
646993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
647993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * This is the actual implementation of {@link #precomputed}, but we bounce
648993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * calls through a method on {@link Platform} so that we can have different
649993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * behavior in GWT.
650993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
651993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default precomputation is to cache the configuration of the original
652993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * matcher in an eight-kilobyte bit array. In some situations this produces a
653993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * matcher which is faster to query than the original.
654993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
655993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation creates a new bit array and passes it to
656993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * {@link #setBits(LookupTable)}.
657993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
658993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  CharMatcher precomputedInternal() {
659993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    final LookupTable table = new LookupTable();
660993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    setBits(table);
661993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
662993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new CharMatcher() {
663993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public boolean matches(char c) {
664993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return table.get(c);
665993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
666993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
667993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      // TODO: make methods like negate() smart
668993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
669993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      @Override public CharMatcher precomputed() {
670993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return this;
671993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
672993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    };
673993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
674993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
675993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
676993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * For use by implementors; sets the bit corresponding to each character ('\0'
677993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * to '{@literal \}uFFFF') that matches this matcher in the given bit array,
678993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * leaving all other bits untouched.
679993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
680993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation loops over every possible character value,
681993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * invoking {@link #matches} for each one.
682993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
683993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  protected void setBits(LookupTable table) {
684993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    char c = Character.MIN_VALUE;
685993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    while (true) {
686993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (matches(c)) {
687993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        table.set(c);
688993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
689993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (c++ == Character.MAX_VALUE) {
690993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        break;
691993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
692993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
693993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
694993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
695993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
696993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * A bit array with one bit per {@code char} value, used by {@link
697993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * CharMatcher#precomputed}.
698993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
699993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>TODO: possibly share a common BitArray class with BloomFilter
700993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * and others... a simpler java.util.BitSet.
701993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
702993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  protected static class LookupTable {
703993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int[] data = new int[2048];
704993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
705993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    void set(char index) {
706993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      data[index >> 5] |= (1 << index);
707993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
708993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    boolean get(char index) {
709993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return (data[index >> 5] & (1 << index)) != 0;
710993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
711993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
712993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
713993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Text processing routines
714993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
715993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
716993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns {@code true} if a character sequence contains only matching
717993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters.
718993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
719993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation iterates over the sequence, invoking {@link
720993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * #matches} for each character, until this returns {@code false} or the end
721993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * is reached.
722993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
723993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param sequence the character sequence to examine, possibly empty
724993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @return {@code true} if this matcher matches every character in the
725993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     sequence, including when the sequence is empty
726993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
727993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public boolean matchesAllOf(CharSequence sequence) {
728993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (int i = sequence.length() - 1; i >= 0; i--) {
729993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (!matches(sequence.charAt(i))) {
730993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return false;
731993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
732993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
733993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return true;
734993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
735993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
736993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
737993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns {@code true} if a character sequence contains no matching
738993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters.
739993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
740993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation iterates over the sequence, invoking {@link
741993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * #matches} for each character, until this returns {@code false} or the end is
742993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * reached.
743993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
744993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param sequence the character sequence to examine, possibly empty
745993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @return {@code true} if this matcher matches every character in the
746993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     sequence, including when the sequence is empty
747993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
748993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public boolean matchesNoneOf(CharSequence sequence) {
749993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return indexIn(sequence) == -1;
750993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
751993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
752993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // TODO: perhaps add matchesAnyOf()
753993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
754993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
755993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns the index of the first matching character in a character sequence,
756993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * or {@code -1} if no matching character is present.
757993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
758993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation iterates over the sequence in forward order
759993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * calling {@link #matches} for each character.
760993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
761993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param sequence the character sequence to examine from the beginning
762993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @return an index, or {@code -1} if no character matches
763993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
764993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public int indexIn(CharSequence sequence) {
765993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int length = sequence.length();
766993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (int i = 0; i < length; i++) {
767993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (matches(sequence.charAt(i))) {
768993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return i;
769993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
770993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
771993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return -1;
772993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
773993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
774993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
775993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns the index of the first matching character in a character sequence,
776993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * starting from a given position, or {@code -1} if no character matches after
777993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * that position.
778993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
779993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation iterates over the sequence in forward order,
780993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * beginning at {@code start}, calling {@link #matches} for each character.
781993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
782993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param sequence the character sequence to examine
783993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param start the first index to examine; must be nonnegative and no
784993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     greater than {@code sequence.length()}
785993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @return the index of the first matching character, guaranteed to be no less
786993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     than {@code start}, or {@code -1} if no character matches
787993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @throws IndexOutOfBoundsException if start is negative or greater than
788993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     {@code sequence.length()}
789993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
790993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public int indexIn(CharSequence sequence, int start) {
791993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int length = sequence.length();
792993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    Preconditions.checkPositionIndex(start, length);
793993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (int i = start; i < length; i++) {
794993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (matches(sequence.charAt(i))) {
795993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return i;
796993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
797993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
798993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return -1;
799993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
800993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
801993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
802993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns the index of the last matching character in a character sequence,
803993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * or {@code -1} if no matching character is present.
804993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
805993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation iterates over the sequence in reverse order
806993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * calling {@link #matches} for each character.
807993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
808993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param sequence the character sequence to examine from the end
809993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @return an index, or {@code -1} if no character matches
810993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
811993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public int lastIndexIn(CharSequence sequence) {
812993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (int i = sequence.length() - 1; i >= 0; i--) {
813993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (matches(sequence.charAt(i))) {
814993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        return i;
815993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
816993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
817993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return -1;
818993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
819993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
820993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
821993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns the number of matching characters found in a character sequence.
822993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
823993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public int countIn(CharSequence sequence) {
824993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int count = 0;
825993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (int i = 0; i < sequence.length(); i++) {
826993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (matches(sequence.charAt(i))) {
827993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        count++;
828993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
829993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
830993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return count;
831993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
832993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
833993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
834993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a string containing all non-matching characters of a character
835993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * sequence, in order. For example: <pre>   {@code
836993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
837993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
838993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
839993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... returns {@code "bzr"}.
840993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
841993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String removeFrom(CharSequence sequence) {
842993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    String string = sequence.toString();
843993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int pos = indexIn(string);
844993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    if (pos == -1) {
845993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return string;
846993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
847993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
848993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    char[] chars = string.toCharArray();
849993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int spread = 1;
850993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
851993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    // This unusual loop comes from extensive benchmarking
852993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    OUT:
853993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    while (true) {
854993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      pos++;
855993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      while (true) {
856993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (pos == chars.length) {
857993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          break OUT;
858993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
859993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (matches(chars[pos])) {
860993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          break;
861993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
862993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        chars[pos - spread] = chars[pos];
863993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        pos++;
864993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
865993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      spread++;
866993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
867993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new String(chars, 0, pos - spread);
868993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
869993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
870993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
871993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a string containing all matching characters of a character
872993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * sequence, in order. For example: <pre>   {@code
873993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
874993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
875993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
876993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... returns {@code "aaa"}.
877993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
878993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String retainFrom(CharSequence sequence) {
879993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return negate().removeFrom(sequence);
880993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
881993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
882993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
883993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a string copy of the input character sequence, with each character
884993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * that matches this matcher replaced by a given replacement character. For
885993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * example: <pre>   {@code
886993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
887993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
888993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
889993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... returns {@code "rodor"}.
890993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
891993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find
892993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * the first matching character, then iterates the remainder of the sequence
893993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * calling {@link #matches(char)} for each character.
894993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
895993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param sequence the character sequence to replace matching characters in
896993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param replacement the character to append to the result string in place of
897993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     each matching character in {@code sequence}
898993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @return the new string
899993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
900993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String replaceFrom(CharSequence sequence, char replacement) {
901993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    String string = sequence.toString();
902993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int pos = indexIn(string);
903993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    if (pos == -1) {
904993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return string;
905993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
906993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    char[] chars = string.toCharArray();
907993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    chars[pos] = replacement;
908993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (int i = pos + 1; i < chars.length; i++) {
909993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (matches(chars[i])) {
910993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        chars[i] = replacement;
911993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
912993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
913993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return new String(chars);
914993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
915993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
916993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
917993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a string copy of the input character sequence, with each character
918993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * that matches this matcher replaced by a given replacement sequence. For
919993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * example: <pre>   {@code
920993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
921993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
922993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
923993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... returns {@code "yoohoo"}.
924993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
925993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p><b>Note:</b> If the replacement is a fixed string with only one character,
926993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * you are better off calling {@link #replaceFrom(CharSequence, char)} directly.
927993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
928993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param sequence the character sequence to replace matching characters in
929993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param replacement the characters to append to the result string in place
930993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     of each matching character in {@code sequence}
931993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @return the new string
932993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
933993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
934993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int replacementLen = replacement.length();
935993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    if (replacementLen == 0) {
936993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return removeFrom(sequence);
937993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
938993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    if (replacementLen == 1) {
939993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return replaceFrom(sequence, replacement.charAt(0));
940993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
941993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
942993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    String string = sequence.toString();
943993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int pos = indexIn(string);
944993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    if (pos == -1) {
945993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return string;
946993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
947993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
948993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int len = string.length();
949993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    StringBuilder buf = new StringBuilder((int) (len * 1.5) + 16);
950993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
951993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int oldpos = 0;
952993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    do {
953993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      buf.append(string, oldpos, pos);
954993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      buf.append(replacement);
955993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      oldpos = pos + 1;
956993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      pos = indexIn(string, oldpos);
957993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    } while (pos != -1);
958993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
959993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    buf.append(string, oldpos, len);
960993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return buf.toString();
961993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
962993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
963993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
964993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a substring of the input character sequence that omits all
965993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters this matcher matches from the beginning and from the end of the
966993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string. For example: <pre> {@code
967993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
968993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
969993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
970993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... returns {@code "cat"}.
971993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
972993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>Note that<pre>   {@code
973993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
974993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
975993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
976993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... is equivalent to {@link String#trim()}.
977993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
978993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String trimFrom(CharSequence sequence) {
979993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int len = sequence.length();
980993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int first;
981993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int last;
982993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
983993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (first = 0; first < len; first++) {
984993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (!matches(sequence.charAt(first))) {
985993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        break;
986993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
987993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
988993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (last = len - 1; last > first; last--) {
989993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (!matches(sequence.charAt(last))) {
990993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        break;
991993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
992993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
993993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
994993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return sequence.subSequence(first, last + 1).toString();
995993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
996993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
997993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
998993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a substring of the input character sequence that omits all
999993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters this matcher matches from the beginning of the
1000993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string. For example: <pre> {@code
1001993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1002993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1003993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1004993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... returns {@code "catbab"}.
1005993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
1006993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String trimLeadingFrom(CharSequence sequence) {
1007993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int len = sequence.length();
1008993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int first;
1009993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1010993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (first = 0; first < len; first++) {
1011993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (!matches(sequence.charAt(first))) {
1012993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        break;
1013993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1014993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1015993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1016993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return sequence.subSequence(first, len).toString();
1017993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
1018993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1019993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
1020993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a substring of the input character sequence that omits all
1021993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * characters this matcher matches from the end of the
1022993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * string. For example: <pre> {@code
1023993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1024993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1025993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1026993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... returns {@code "abacat"}.
1027993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
1028993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String trimTrailingFrom(CharSequence sequence) {
1029993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int len = sequence.length();
1030993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int last;
1031993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1032993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (last = len - 1; last >= 0; last--) {
1033993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (!matches(sequence.charAt(last))) {
1034993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        break;
1035993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1036993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1037993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1038993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return sequence.subSequence(0, last + 1).toString();
1039993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
1040993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1041993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
1042993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns a string copy of the input character sequence, with each group of
1043993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * consecutive characters that match this matcher replaced by a single
1044993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * replacement character. For example: <pre>   {@code
1045993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1046993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1047993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1048993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * ... returns {@code "b-p-r"}.
1049993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1050993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find
1051993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * the first matching character, then iterates the remainder of the sequence
1052993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * calling {@link #matches(char)} for each character.
1053993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1054993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param sequence the character sequence to replace matching groups of
1055993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     characters in
1056993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @param replacement the character to append to the result string in place of
1057993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *     each group of matching characters in {@code sequence}
1058993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @return the new string
1059993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
1060993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String collapseFrom(CharSequence sequence, char replacement) {
1061993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int first = indexIn(sequence);
1062993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    if (first == -1) {
1063993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return sequence.toString();
1064993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1065993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1066993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    // TODO: this implementation can probably be made faster.
1067993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1068993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    StringBuilder builder = new StringBuilder(sequence.length())
1069993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        .append(sequence.subSequence(0, first))
1070993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        .append(replacement);
1071993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    boolean in = true;
1072993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (int i = first + 1; i < sequence.length(); i++) {
1073993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      char c = sequence.charAt(i);
1074993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (apply(c)) {
1075993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (!in) {
1076993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          builder.append(replacement);
1077993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          in = true;
1078993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
1079993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      } else {
1080993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        builder.append(c);
1081993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        in = false;
1082993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1083993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1084993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return builder.toString();
1085993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
1086993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1087993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
1088993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Collapses groups of matching characters exactly as {@link #collapseFrom}
1089993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * does, except that groups of matching characters at the start or end of the
1090993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * sequence are removed without replacement.
1091993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
1092993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1093993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    int first = negate().indexIn(sequence);
1094993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    if (first == -1) {
1095993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      return ""; // everything matches. nothing's left.
1096993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1097993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    StringBuilder builder = new StringBuilder(sequence.length());
1098993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    boolean inMatchingGroup = false;
1099993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    for (int i = first; i < sequence.length(); i++) {
1100993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      char c = sequence.charAt(i);
1101993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      if (apply(c)) {
1102993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        inMatchingGroup = true;
1103993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      } else {
1104993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        if (inMatchingGroup) {
1105993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          builder.append(replacement);
1106993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira          inMatchingGroup = false;
1107993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        }
1108993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira        builder.append(c);
1109993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira      }
1110993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    }
1111993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return builder.toString();
1112993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
1113993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1114993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  // Predicate interface
1115993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira
1116993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /**
1117993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * Returns {@code true} if this matcher matches the given character.
1118993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   *
1119993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   * @throws NullPointerException if {@code character} is null
1120993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira   */
1121993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  /*@Override*/ public boolean apply(Character character) {
1122993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira    return matches(character);
1123993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira  }
1124993ef2674bf860a84c5c17e51a7a9e13e5d56504Mindy Pereira}