1bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor/*
21d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Copyright (C) 2008 The Guava Authors
3bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
4bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Licensed under the Apache License, Version 2.0 (the "License");
5bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * you may not use this file except in compliance with the License.
6bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * You may obtain a copy of the License at
7bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
8bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * http://www.apache.org/licenses/LICENSE-2.0
9bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
10bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Unless required by applicable law or agreed to in writing, software
11bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * distributed under the License is distributed on an "AS IS" BASIS,
12bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * See the License for the specific language governing permissions and
14bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * limitations under the License.
15bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */
16bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
17bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorpackage com.google.common.base;
18bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
19bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport static com.google.common.base.Preconditions.checkArgument;
20bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport static com.google.common.base.Preconditions.checkNotNull;
21bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.annotations.Beta;
231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.annotations.GwtCompatible;
241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
25bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport java.util.ArrayList;
26bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport java.util.Arrays;
27bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport java.util.List;
28bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport javax.annotation.CheckReturnValue;
301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
31bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor/**
321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * for any {@link Object}. Also offers basic text processing methods based on this function.
341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Implementations are strongly encouraged to be side-effect-free and immutable.
351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert *
361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
38bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * treats these just as two separate characters.
43bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>Example usages: <pre>
451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert *   String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput);
461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert *   if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
47bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor *
48bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @author Kevin Bourrillion
491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @since 1.0
50bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */
511d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert@Beta // Possibly change from chars to code points; decide constants vs. methods
52bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor@GwtCompatible
53bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorpublic abstract class CharMatcher implements Predicate<Character> {
54bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  // Constants
55bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
56bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  // Excludes 2000-2000a, which is handled as a range
57bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private static final String BREAKING_WHITESPACE_CHARS =
58bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
59bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
60bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  // Excludes 2007, which is handled as a gap in a pair of ranges
61bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private static final String NON_BREAKING_WHITESPACE_CHARS =
62bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      "\u00a0\u180e\u202f";
63bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
64bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is whitespace according to the latest Unicode standard, as
661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * illustrated
67bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * This is not the same definition used by other Java APIs. (See a
691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * definitions of "whitespace"</a>.)
71bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * to date.
74bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
75bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher WHITESPACE =
76bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
771d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          .or(inRange('\u2000', '\u200a'))
781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          .precomputed();
79bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
80bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * discussion of that term.
84bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @since 2.0
86bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
87bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher BREAKING_WHITESPACE =
88bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      anyOf(BREAKING_WHITESPACE_CHARS)
89bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          .or(inRange('\u2000', '\u2006'))
901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          .or(inRange('\u2008', '\u200a'))
911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          .precomputed();
92bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
93bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is ASCII, meaning that its code point is less than 128.
95bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
96bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher ASCII = inRange('\0', '\u007f');
97bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
98bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
99bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Determines whether a character is a digit according to
100bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
101bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
102bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher DIGIT;
103bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
104bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  static {
105bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    CharMatcher digit = inRange('0', '9');
106bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    String zeroes =
107bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
108bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
109bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
110bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (char base : zeroes.toCharArray()) {
111bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      digit = digit.or(inRange(base, (char) (base + 9)));
112bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    DIGIT = digit.precomputed();
114bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
115bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
116bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
1171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
1181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
119bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
120bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
121bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public boolean matches(char c) {
122bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return Character.isDigit(c);
123bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
124bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  };
125bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
126bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
1271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
1281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
129bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * inRange('a', 'z').or(inRange('A', 'Z'))}.
130bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
131bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher JAVA_LETTER = new CharMatcher() {
132bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public boolean matches(char c) {
133bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return Character.isLetter(c);
134bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
135bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  };
136bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
137bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
138bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Determines whether a character is a letter or digit according to {@link
139bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Character#isLetterOrDigit(char) Java's definition}.
140bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
141bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
142bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public boolean matches(char c) {
143bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return Character.isLetterOrDigit(c);
144bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
145bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  };
146bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
147bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
1481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
1491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Java's definition}.
150bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
151bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
152bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public boolean matches(char c) {
153bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return Character.isUpperCase(c);
154bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
155bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  };
156bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
157bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
1581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
1591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Java's definition}.
160bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
161bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
162bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public boolean matches(char c) {
163bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return Character.isLowerCase(c);
164bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
165bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  };
166bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
167bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
1681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is an ISO control character as specified by {@link
1691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Character#isISOControl(char)}.
170bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
1711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public static final CharMatcher JAVA_ISO_CONTROL =
1721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
173bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
174bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
1751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is invisible; that is, if its Unicode category is any of
1761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
1771d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * PRIVATE_USE according to ICU4J.
178bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
179bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
180bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u007f', '\u00a0'))
181bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(is('\u00ad'))
182bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u0600', '\u0603'))
183bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
184bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u2000', '\u200f'))
185bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u2028', '\u202f'))
186bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u205f', '\u2064'))
187bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u206a', '\u206f'))
188bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(is('\u3000'))
189bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\ud800', '\uf8ff'))
1901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
1911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      .precomputed();
192bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
193bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
1941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
1951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * errs on the side of returning {@code false} (that is, it tends to assume a character is
1961d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * double-width).
197bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
1981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
1991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * date.
200bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
201bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
202bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(is('\u05be'))
203bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u05d0', '\u05ea'))
204bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(is('\u05f3'))
205bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(is('\u05f4'))
206bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u0600', '\u06ff'))
207bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u0750', '\u077f'))
208bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u0e00', '\u0e7f'))
209bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u1e00', '\u20af'))
210bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\u2100', '\u213a'))
211bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\ufb50', '\ufdff'))
212bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      .or(inRange('\ufe70', '\ufeff'))
2131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      .or(inRange('\uff61', '\uffdc'))
2141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      .precomputed();
215bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
216bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /** Matches any character. */
2171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public static final CharMatcher ANY =
2181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      new CharMatcher() {
2191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public boolean matches(char c) {
2201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return true;
2211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
222bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
2231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public int indexIn(CharSequence sequence) {
2241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return (sequence.length() == 0) ? -1 : 0;
2251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public int indexIn(CharSequence sequence, int start) {
2281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          int length = sequence.length();
2291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          Preconditions.checkPositionIndex(start, length);
2301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return (start == length) ? -1 : start;
2311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public int lastIndexIn(CharSequence sequence) {
2341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.length() - 1;
2351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public boolean matchesAllOf(CharSequence sequence) {
2381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(sequence);
2391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return true;
2401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public boolean matchesNoneOf(CharSequence sequence) {
2431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.length() == 0;
2441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String removeFrom(CharSequence sequence) {
2471d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(sequence);
2481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return "";
2491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2511d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String replaceFrom(CharSequence sequence, char replacement) {
2521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          char[] array = new char[sequence.length()];
2531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          Arrays.fill(array, replacement);
2541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return new String(array);
2551d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
2581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
2591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          for (int i = 0; i < sequence.length(); i++) {
2601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert            retval.append(replacement);
2611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          }
2621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return retval.toString();
2631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String collapseFrom(CharSequence sequence, char replacement) {
2661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return (sequence.length() == 0) ? "" : String.valueOf(replacement);
2671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String trimFrom(CharSequence sequence) {
2701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(sequence);
2711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return "";
2721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public int countIn(CharSequence sequence) {
2751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.length();
2761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2771d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public CharMatcher and(CharMatcher other) {
2791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return checkNotNull(other);
2801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public CharMatcher or(CharMatcher other) {
2831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(other);
2841d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return this;
2851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public CharMatcher negate() {
2881d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return NONE;
2891d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
2911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public CharMatcher precomputed() {
2921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return this;
2931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
2941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      };
295bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
296bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /** Matches no characters. */
2971d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public static final CharMatcher NONE =
2981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      new CharMatcher() {
2991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public boolean matches(char c) {
3001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return false;
3011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
302bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
3031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public int indexIn(CharSequence sequence) {
3041d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(sequence);
3051d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return -1;
3061d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3071d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public int indexIn(CharSequence sequence, int start) {
3091d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          int length = sequence.length();
3101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          Preconditions.checkPositionIndex(start, length);
3111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return -1;
3121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public int lastIndexIn(CharSequence sequence) {
3151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(sequence);
3161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return -1;
3171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public boolean matchesAllOf(CharSequence sequence) {
3201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.length() == 0;
3211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public boolean matchesNoneOf(CharSequence sequence) {
3241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(sequence);
3251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return true;
3261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String removeFrom(CharSequence sequence) {
3291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.toString();
3301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String replaceFrom(CharSequence sequence, char replacement) {
3331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.toString();
3341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
3371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(replacement);
3381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.toString();
3391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String collapseFrom(CharSequence sequence, char replacement) {
3421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.toString();
3431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public String trimFrom(CharSequence sequence) {
3461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return sequence.toString();
3471d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public int countIn(CharSequence sequence) {
3501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(sequence);
3511d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return 0;
3521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public CharMatcher and(CharMatcher other) {
3551d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          checkNotNull(other);
3561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return this;
3571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public CharMatcher or(CharMatcher other) {
3601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return checkNotNull(other);
3611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public CharMatcher negate() {
3641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return ANY;
3651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override void setBits(LookupTable table) {}
3681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
3691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        @Override public CharMatcher precomputed() {
3701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return this;
3711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
3721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      };
373bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
374bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  // Static factories
375bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
376bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
377bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a {@code char} matcher that matches only one specified character.
378bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
379bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static CharMatcher is(final char match) {
380bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new CharMatcher() {
381bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matches(char c) {
382bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return c == match;
383bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
384bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
3851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      @Override public String replaceFrom(CharSequence sequence, char replacement) {
386bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return sequence.toString().replace(match, replacement);
387bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
3881d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
389bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher and(CharMatcher other) {
390bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return other.matches(match) ? this : NONE;
391bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
3921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
393bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher or(CharMatcher other) {
394bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return other.matches(match) ? other : super.or(other);
395bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
3961d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
397bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher negate() {
398bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return isNot(match);
399bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
4001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
4011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      @Override void setBits(LookupTable table) {
402bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        table.set(match);
403bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
4041d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
405bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher precomputed() {
406bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return this;
407bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
408bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    };
409bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
410bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
411bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
4121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a {@code char} matcher that matches any character except the one specified.
413bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
414bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
415bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
416bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static CharMatcher isNot(final char match) {
417bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new CharMatcher() {
418bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matches(char c) {
419bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return c != match;
420bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
421bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
422bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher and(CharMatcher other) {
423bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return other.matches(match) ? super.and(other) : other;
424bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
4251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
426bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher or(CharMatcher other) {
427bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return other.matches(match) ? ANY : this;
428bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
4291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
430bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher negate() {
431bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return is(match);
432bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
433bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    };
434bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
435bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
436bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
4371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a {@code char} matcher that matches any character present in the given character
4381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * sequence.
439bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
440bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static CharMatcher anyOf(final CharSequence sequence) {
441bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    switch (sequence.length()) {
442bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      case 0:
443bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return NONE;
444bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      case 1:
445bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return is(sequence.charAt(0));
446bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      case 2:
447bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        final char match1 = sequence.charAt(0);
448bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        final char match2 = sequence.charAt(1);
449bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return new CharMatcher() {
450bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          @Override public boolean matches(char c) {
451bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return c == match1 || c == match2;
452bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
4531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
4541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          @Override void setBits(LookupTable table) {
455bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            table.set(match1);
456bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            table.set(match2);
457bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
4581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
459bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          @Override public CharMatcher precomputed() {
460bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            return this;
461bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
462bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        };
463bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
464bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
465bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    final char[] chars = sequence.toString().toCharArray();
466bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    Arrays.sort(chars); // not worth collapsing duplicates
467bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
468bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new CharMatcher() {
469bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matches(char c) {
470bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return Arrays.binarySearch(chars, c) >= 0;
471bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
4721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
4731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      @Override void setBits(LookupTable table) {
474bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        for (char c : chars) {
475bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          table.set(c);
476bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
477bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
478bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    };
479bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
480bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
481bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
4821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a {@code char} matcher that matches any character not present in the given character
4831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * sequence.
484bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
485bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public static CharMatcher noneOf(CharSequence sequence) {
486bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return anyOf(sequence).negate();
487bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
488bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
489bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
4901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
4911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
4921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * CharMatcher.inRange('a', 'z')}.
493bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
494bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
495bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
4961d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
497bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    checkArgument(endInclusive >= startInclusive);
498bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new CharMatcher() {
499bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matches(char c) {
500bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return startInclusive <= c && c <= endInclusive;
501bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
5021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
5031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      @Override void setBits(LookupTable table) {
504bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        char c = startInclusive;
505bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        while (true) {
506bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          table.set(c);
507bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          if (c++ == endInclusive) {
508bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor            break;
509bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          }
510bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
511bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
5121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
513bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher precomputed() {
514bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return this;
515bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
516bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    };
517bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
518bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
519bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
5201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
5211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * which operates on primitive {@code char} instances instead.
522bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
5231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
524bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    checkNotNull(predicate);
525bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    if (predicate instanceof CharMatcher) {
526bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return (CharMatcher) predicate;
527bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
528bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new CharMatcher() {
529bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matches(char c) {
530bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return predicate.apply(c);
531bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
5321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
533bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean apply(Character character) {
534bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return predicate.apply(checkNotNull(character));
535bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
536bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    };
537bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
538bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
5391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  // Constructors
5401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
5411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
5421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Constructor for use by subclasses.
5431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
5441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  protected CharMatcher() {}
5451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
546bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  // Abstract methods
547bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
548bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /** Determines a true or false value for the given character. */
549bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public abstract boolean matches(char c);
550bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
551bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  // Non-static factories
552bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
553bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
554bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns a matcher that matches any character not matched by this matcher.
555bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
556bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public CharMatcher negate() {
557bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    final CharMatcher original = this;
558bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new CharMatcher() {
559bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matches(char c) {
560bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return !original.matches(c);
561bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
562bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
563bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matchesAllOf(CharSequence sequence) {
564bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return original.matchesNoneOf(sequence);
565bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
5661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
567bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matchesNoneOf(CharSequence sequence) {
568bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return original.matchesAllOf(sequence);
569bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
5701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
571bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public int countIn(CharSequence sequence) {
572bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return sequence.length() - original.countIn(sequence);
573bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
5741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
575bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher negate() {
576bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return original;
577bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
578bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    };
579bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
580bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
581bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
5821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
583bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
584bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public CharMatcher and(CharMatcher other) {
585bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new And(Arrays.asList(this, checkNotNull(other)));
586bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
587bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
588bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private static class And extends CharMatcher {
589bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    List<CharMatcher> components;
590bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
591bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    And(List<CharMatcher> components) {
592bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      this.components = components; // Skip defensive copy (private)
593bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
594bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
595bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public boolean matches(char c) {
596bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      for (CharMatcher matcher : components) {
597bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        if (!matcher.matches(c)) {
598bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          return false;
599bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
600bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
601bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return true;
602bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
603bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
604bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public CharMatcher and(CharMatcher other) {
605bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
606bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      newComponents.add(checkNotNull(other));
607bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return new And(newComponents);
608bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
609bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
610bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
611bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
6121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
613bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
614bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public CharMatcher or(CharMatcher other) {
615bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new Or(Arrays.asList(this, checkNotNull(other)));
616bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
617bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
618bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  private static class Or extends CharMatcher {
619bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    List<CharMatcher> components;
620bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
621bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    Or(List<CharMatcher> components) {
622bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      this.components = components; // Skip defensive copy (private)
623bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
624bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
625bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public boolean matches(char c) {
626bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      for (CharMatcher matcher : components) {
627bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        if (matcher.matches(c)) {
628bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          return true;
629bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
630bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
631bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return false;
632bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
633bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
634bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    @Override public CharMatcher or(CharMatcher other) {
635bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
636bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      newComponents.add(checkNotNull(other));
637bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return new Or(newComponents);
638bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
639bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
6401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    @Override void setBits(LookupTable table) {
641bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      for (CharMatcher matcher : components) {
642bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        matcher.setBits(table);
643bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
644bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
645bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
646bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
647bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
6481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
6491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
6501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * worthwhile only if the precomputed matcher is queried many thousands of times.
651bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
6521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
6531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
6541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * worthwhile tradeoff in a browser.
655bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
656bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public CharMatcher precomputed() {
657bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return Platform.precomputeCharMatcher(this);
658bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
659bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
660bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
6611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
6621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * on {@link Platform} so that we can have different behavior in GWT.
663bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
6641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default precomputation is to cache the configuration of the original matcher in an
6651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
6661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * than the original.
667bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
6681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation creates a new bit array and passes it to {@link
6691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * #setBits(LookupTable)}.
670bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
671bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  CharMatcher precomputedInternal() {
672bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    final LookupTable table = new LookupTable();
673bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    setBits(table);
674bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
675bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new CharMatcher() {
676bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public boolean matches(char c) {
677bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return table.get(c);
678bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
679bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
6801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      // TODO(kevinb): make methods like negate() smart?
681bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
682bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      @Override public CharMatcher precomputed() {
683bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return this;
684bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
685bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    };
686bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
687bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
688bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
6891d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
6901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
691bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
6921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation loops over every possible character value, invoking {@link
6931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * #matches} for each one.
694bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
6951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  void setBits(LookupTable table) {
696bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    char c = Character.MIN_VALUE;
697bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    while (true) {
698bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (matches(c)) {
699bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        table.set(c);
700bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
701bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (c++ == Character.MAX_VALUE) {
702bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        break;
703bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
704bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
705bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
706bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
707bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
7081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
709bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
7101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
7111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * simpler java.util.BitSet.
712bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
7131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  private static final class LookupTable {
714bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int[] data = new int[2048];
715bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
716bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    void set(char index) {
717bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      data[index >> 5] |= (1 << index);
718bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
7191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
720bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    boolean get(char index) {
721bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return (data[index >> 5] & (1 << index)) != 0;
722bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
723bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
724bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
725bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  // Text processing routines
726bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
727bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
7281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns {@code true} if a character sequence contains at least one matching character.
7291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Equivalent to {@code !matchesNoneOf(sequence)}.
7301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *
7311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
7321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * character, until this returns {@code true} or the end is reached.
7331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *
7341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @param sequence the character sequence to examine, possibly empty
7351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @return {@code true} if this matcher matches at least one character in the sequence
7361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @since 8.0
7371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
7381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public boolean matchesAnyOf(CharSequence sequence) {
7391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    return !matchesNoneOf(sequence);
7401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
7411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
7421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
7431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns {@code true} if a character sequence contains only matching characters.
744bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
7451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
7461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * character, until this returns {@code false} or the end is reached.
747bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
748bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param sequence the character sequence to examine, possibly empty
7491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @return {@code true} if this matcher matches every character in the sequence, including when
7501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *         the sequence is empty
751bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
752bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public boolean matchesAllOf(CharSequence sequence) {
753bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (int i = sequence.length() - 1; i >= 0; i--) {
754bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (!matches(sequence.charAt(i))) {
755bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return false;
756bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
757bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
758bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return true;
759bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
760bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
761bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
7621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
7631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * {@code !matchesAnyOf(sequence)}.
764bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
7651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
7661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * character, until this returns {@code false} or the end is reached.
767bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
768bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param sequence the character sequence to examine, possibly empty
7691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @return {@code true} if this matcher matches every character in the sequence, including when
7701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *         the sequence is empty
771bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
772bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public boolean matchesNoneOf(CharSequence sequence) {
773bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return indexIn(sequence) == -1;
774bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
775bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
7761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  // TODO(kevinb): add matchesAnyOf()
777bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
778bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
7791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns the index of the first matching character in a character sequence, or {@code -1} if no
7801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * matching character is present.
781bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
7821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation iterates over the sequence in forward order calling {@link
7831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * #matches} for each character.
784bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
785bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param sequence the character sequence to examine from the beginning
786bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return an index, or {@code -1} if no character matches
787bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
788bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public int indexIn(CharSequence sequence) {
789bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int length = sequence.length();
790bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (int i = 0; i < length; i++) {
791bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (matches(sequence.charAt(i))) {
792bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return i;
793bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
794bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
795bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return -1;
796bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
797bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
798bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
7991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns the index of the first matching character in a character sequence, starting from a
8001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * given position, or {@code -1} if no character matches after that position.
801bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
8021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
8031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * start}, calling {@link #matches} for each character.
804bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
805bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param sequence the character sequence to examine
8061d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @param start the first index to examine; must be nonnegative and no greater than {@code
8071d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *        sequence.length()}
8081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @return the index of the first matching character, guaranteed to be no less than {@code start},
8091d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *         or {@code -1} if no character matches
8101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
8111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *         sequence.length()}
812bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
813bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public int indexIn(CharSequence sequence, int start) {
814bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int length = sequence.length();
815bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    Preconditions.checkPositionIndex(start, length);
816bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (int i = start; i < length; i++) {
817bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (matches(sequence.charAt(i))) {
818bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return i;
819bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
820bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
821bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return -1;
822bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
823bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
824bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
8251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns the index of the last matching character in a character sequence, or {@code -1} if no
8261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * matching character is present.
827bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
8281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation iterates over the sequence in reverse order calling {@link
8291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * #matches} for each character.
830bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
831bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param sequence the character sequence to examine from the end
832bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return an index, or {@code -1} if no character matches
833bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
834bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public int lastIndexIn(CharSequence sequence) {
835bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (int i = sequence.length() - 1; i >= 0; i--) {
836bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (matches(sequence.charAt(i))) {
837bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        return i;
838bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
839bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
840bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return -1;
841bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
842bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
843bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
844bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns the number of matching characters found in a character sequence.
845bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
846bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public int countIn(CharSequence sequence) {
847bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int count = 0;
848bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (int i = 0; i < sequence.length(); i++) {
849bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (matches(sequence.charAt(i))) {
850bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        count++;
851bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
852bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
853bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return count;
854bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
855bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
856bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
8571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a string containing all non-matching characters of a character sequence, in order. For
8581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * example: <pre>   {@code
859bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
860bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
861bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
862bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... returns {@code "bzr"}.
863bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
8641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
865bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String removeFrom(CharSequence sequence) {
866bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    String string = sequence.toString();
867bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int pos = indexIn(string);
868bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    if (pos == -1) {
869bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return string;
870bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
871bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
872bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    char[] chars = string.toCharArray();
873bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int spread = 1;
874bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
875bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    // This unusual loop comes from extensive benchmarking
8761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    OUT: while (true) {
877bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      pos++;
878bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      while (true) {
879bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        if (pos == chars.length) {
880bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          break OUT;
881bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
882bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        if (matches(chars[pos])) {
883bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          break;
884bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
885bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        chars[pos - spread] = chars[pos];
886bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        pos++;
887bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
888bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      spread++;
889bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
890bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new String(chars, 0, pos - spread);
891bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
892bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
893bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
8941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a string containing all matching characters of a character sequence, in order. For
8951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * example: <pre>   {@code
896bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
897bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
898bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
899bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... returns {@code "aaa"}.
900bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
9011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
902bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String retainFrom(CharSequence sequence) {
903bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return negate().removeFrom(sequence);
904bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
905bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
906bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
9071d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a string copy of the input character sequence, with each character that matches this
9081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * matcher replaced by a given replacement character. For example: <pre>   {@code
909bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
910bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
911bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
912bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... returns {@code "rodor"}.
913bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
9141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
9151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
9161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * character.
917bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
918bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param sequence the character sequence to replace matching characters in
9191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @param replacement the character to append to the result string in place of each matching
9201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *        character in {@code sequence}
921bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return the new string
922bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
9231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
924bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String replaceFrom(CharSequence sequence, char replacement) {
925bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    String string = sequence.toString();
926bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int pos = indexIn(string);
927bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    if (pos == -1) {
928bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return string;
929bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
930bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    char[] chars = string.toCharArray();
931bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    chars[pos] = replacement;
932bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (int i = pos + 1; i < chars.length; i++) {
933bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (matches(chars[i])) {
934bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        chars[i] = replacement;
935bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
936bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
937bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return new String(chars);
938bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
939bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
940bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
9411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a string copy of the input character sequence, with each character that matches this
9421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * matcher replaced by a given replacement sequence. For example: <pre>   {@code
943bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
944bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
945bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
946bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... returns {@code "yoohoo"}.
947bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
9481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
9491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * off calling {@link #replaceFrom(CharSequence, char)} directly.
950bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
951bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @param sequence the character sequence to replace matching characters in
9521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @param replacement the characters to append to the result string in place of each matching
9531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *        character in {@code sequence}
954bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return the new string
955bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
9561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
957bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
958bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int replacementLen = replacement.length();
959bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    if (replacementLen == 0) {
960bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return removeFrom(sequence);
961bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
962bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    if (replacementLen == 1) {
963bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return replaceFrom(sequence, replacement.charAt(0));
964bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
965bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
966bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    String string = sequence.toString();
967bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int pos = indexIn(string);
968bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    if (pos == -1) {
969bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return string;
970bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
971bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
972bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int len = string.length();
9731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
974bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
975bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int oldpos = 0;
976bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    do {
977bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      buf.append(string, oldpos, pos);
978bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      buf.append(replacement);
979bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      oldpos = pos + 1;
980bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      pos = indexIn(string, oldpos);
981bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    } while (pos != -1);
982bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
983bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    buf.append(string, oldpos, len);
984bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return buf.toString();
985bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
986bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
987bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
9881d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a substring of the input character sequence that omits all characters this matcher
9891d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * matches from the beginning and from the end of the string. For example: <pre>   {@code
990bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
991bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
992bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
993bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... returns {@code "cat"}.
994bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
9951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>Note that: <pre>   {@code
996bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
997bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
998bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
999bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... is equivalent to {@link String#trim()}.
1000bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
10011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
1002bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String trimFrom(CharSequence sequence) {
1003bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int len = sequence.length();
1004bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int first;
1005bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int last;
1006bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1007bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (first = 0; first < len; first++) {
1008bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (!matches(sequence.charAt(first))) {
1009bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        break;
1010bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
1011bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1012bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (last = len - 1; last > first; last--) {
1013bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (!matches(sequence.charAt(last))) {
1014bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        break;
1015bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
1016bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1017bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1018bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return sequence.subSequence(first, last + 1).toString();
1019bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
1020bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1021bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
10221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a substring of the input character sequence that omits all characters this matcher
10231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * matches from the beginning of the string. For example: <pre> {@code
1024bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
1025bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1026bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
1027bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... returns {@code "catbab"}.
1028bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
10291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
1030bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String trimLeadingFrom(CharSequence sequence) {
1031bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int len = sequence.length();
1032bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int first;
1033bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1034bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (first = 0; first < len; first++) {
1035bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (!matches(sequence.charAt(first))) {
1036bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        break;
1037bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
1038bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1039bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1040bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return sequence.subSequence(first, len).toString();
1041bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
1042bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1043bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
10441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a substring of the input character sequence that omits all characters this matcher
10451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * matches from the end of the string. For example: <pre> {@code
1046bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
1047bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1048bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
1049bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... returns {@code "abacat"}.
1050bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
10511d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
1052bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String trimTrailingFrom(CharSequence sequence) {
1053bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int len = sequence.length();
1054bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int last;
1055bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1056bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (last = len - 1; last >= 0; last--) {
1057bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (!matches(sequence.charAt(last))) {
1058bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        break;
1059bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
1060bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1061bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1062bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return sequence.subSequence(0, last + 1).toString();
1063bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
1064bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1065bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
10661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Returns a string copy of the input character sequence, with each group of consecutive
10671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * characters that match this matcher replaced by a single replacement character. For example:
10681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <pre>   {@code
1069bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
1070bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1071bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
1072bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * ... returns {@code "b-p-r"}.
1073bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
10741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
10751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
10761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * character.
1077bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
10781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @param sequence the character sequence to replace matching groups of characters in
10791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * @param replacement the character to append to the result string in place of each group of
10801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   *        matching characters in {@code sequence}
1081bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @return the new string
1082bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
10831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
1084bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String collapseFrom(CharSequence sequence, char replacement) {
1085bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int first = indexIn(sequence);
1086bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    if (first == -1) {
1087bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return sequence.toString();
1088bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1089bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
10901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    // TODO(kevinb): see if this implementation can be made faster
1091bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    StringBuilder builder = new StringBuilder(sequence.length())
1092bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        .append(sequence.subSequence(0, first))
1093bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        .append(replacement);
1094bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    boolean in = true;
1095bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (int i = first + 1; i < sequence.length(); i++) {
1096bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      char c = sequence.charAt(i);
1097bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (apply(c)) {
1098bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        if (!in) {
1099bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          builder.append(replacement);
1100bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          in = true;
1101bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
1102bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      } else {
1103bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        builder.append(c);
1104bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        in = false;
1105bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
1106bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1107bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return builder.toString();
1108bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
1109bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1110bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
11111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
11121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * groups of matching characters at the start or end of the sequence are removed without
11131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * replacement.
1114bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
11151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @CheckReturnValue
1116bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1117bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    int first = negate().indexIn(sequence);
1118bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    if (first == -1) {
1119bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      return ""; // everything matches. nothing's left.
1120bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1121bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    StringBuilder builder = new StringBuilder(sequence.length());
1122bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    boolean inMatchingGroup = false;
1123bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    for (int i = first; i < sequence.length(); i++) {
1124bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      char c = sequence.charAt(i);
1125bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      if (apply(c)) {
1126bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        inMatchingGroup = true;
1127bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      } else {
1128bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        if (inMatchingGroup) {
1129bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          builder.append(replacement);
1130bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor          inMatchingGroup = false;
1131bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        }
1132bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor        builder.append(c);
1133bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor      }
1134bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    }
1135bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return builder.toString();
1136bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
1137bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1138bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  // Predicate interface
1139bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor
1140bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  /**
1141bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * Returns {@code true} if this matcher matches the given character.
1142bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   *
1143bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   * @throws NullPointerException if {@code character} is null
1144bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor   */
11451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  @Override public boolean apply(Character character) {
1146bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor    return matches(character);
1147bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor  }
1148bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor}
1149