1/*
2 * Copyright (C) 2008 The Guava Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.common.base;
18
19import static com.google.common.base.Preconditions.checkArgument;
20import static com.google.common.base.Preconditions.checkNotNull;
21
22import com.google.common.annotations.Beta;
23import com.google.common.annotations.GwtCompatible;
24
25import java.util.Arrays;
26
27import javax.annotation.CheckReturnValue;
28
29/**
30 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
31 * for any {@link Object}. Also offers basic text processing methods based on this function.
32 * Implementations are strongly encouraged to be side-effect-free and immutable.
33 *
34 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
35 * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
36 *
37 * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
38 * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
39 * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
40 * treats these just as two separate characters.
41 *
42 * <p>Example usages: <pre>
43 *   String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput);
44 *   if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
45 *
46 * <p>See the Guava User Guide article on <a href=
47 * "http://code.google.com/p/guava-libraries/wiki/StringsExplained#CharMatcher">
48 * {@code CharMatcher}</a>.
49 *
50 * @author Kevin Bourrillion
51 * @since 1.0
52 */
53@Beta // Possibly change from chars to code points; decide constants vs. methods
54@GwtCompatible(emulated = true)
55public abstract class CharMatcher implements Predicate<Character> {
56
57  // Constants
58  /**
59   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
60   * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
61   * discussion of that term.
62   *
63   * @since 2.0
64   */
65  public static final CharMatcher BREAKING_WHITESPACE = new CharMatcher() {
66    @Override
67    public boolean matches(char c) {
68      switch (c) {
69        case '\t':
70        case '\n':
71        case '\013':
72        case '\f':
73        case '\r':
74        case ' ':
75        case '\u0085':
76        case '\u1680':
77        case '\u2028':
78        case '\u2029':
79        case '\u205f':
80        case '\u3000':
81          return true;
82        case '\u2007':
83          return false;
84        default:
85          return c >= '\u2000' && c <= '\u200a';
86      }
87    }
88
89    @Override
90    public String toString() {
91      return "CharMatcher.BREAKING_WHITESPACE";
92    }
93  };
94
95  /**
96   * Determines whether a character is ASCII, meaning that its code point is less than 128.
97   */
98  public static final CharMatcher ASCII = inRange('\0', '\u007f', "CharMatcher.ASCII");
99
100  private static class RangesMatcher extends CharMatcher {
101    private final char[] rangeStarts;
102    private final char[] rangeEnds;
103
104    RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) {
105      super(description);
106      this.rangeStarts = rangeStarts;
107      this.rangeEnds = rangeEnds;
108      checkArgument(rangeStarts.length == rangeEnds.length);
109      for (int i = 0; i < rangeStarts.length; i++) {
110        checkArgument(rangeStarts[i] <= rangeEnds[i]);
111        if (i + 1 < rangeStarts.length) {
112          checkArgument(rangeEnds[i] < rangeStarts[i + 1]);
113        }
114      }
115    }
116
117    @Override
118    public boolean matches(char c) {
119      int index = Arrays.binarySearch(rangeStarts, c);
120      if (index >= 0) {
121        return true;
122      } else {
123        index = ~index - 1;
124        return index >= 0 && c <= rangeEnds[index];
125      }
126    }
127  }
128
129  // Must be in ascending order.
130  private static final String ZEROES = "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6"
131      + "\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1b50\u1bb0"
132      + "\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
133
134  private static final String NINES;
135  static {
136    StringBuilder builder = new StringBuilder(ZEROES.length());
137    for (int i = 0; i < ZEROES.length(); i++) {
138      builder.append((char) (ZEROES.charAt(i) + 9));
139    }
140    NINES = builder.toString();
141  }
142
143  /**
144   * Determines whether a character is a digit according to
145   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
146   * If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
147   */
148  public static final CharMatcher DIGIT = new RangesMatcher(
149      "CharMatcher.DIGIT", ZEROES.toCharArray(), NINES.toCharArray());
150
151  /**
152   * Determines whether a character is a digit according to {@linkplain Character#isDigit(char)
153   * Java's definition}. If you only care to match ASCII digits, you can use {@code
154   * inRange('0', '9')}.
155   */
156  public static final CharMatcher JAVA_DIGIT = new CharMatcher("CharMatcher.JAVA_DIGIT") {
157    @Override public boolean matches(char c) {
158      return Character.isDigit(c);
159    }
160  };
161
162  /**
163   * Determines whether a character is a letter according to {@linkplain Character#isLetter(char)
164   * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code
165   * inRange('a', 'z').or(inRange('A', 'Z'))}.
166   */
167  public static final CharMatcher JAVA_LETTER = new CharMatcher("CharMatcher.JAVA_LETTER") {
168    @Override public boolean matches(char c) {
169      return Character.isLetter(c);
170    }
171  };
172
173  /**
174   * Determines whether a character is a letter or digit according to {@linkplain
175   * Character#isLetterOrDigit(char) Java's definition}.
176   */
177  public static final CharMatcher JAVA_LETTER_OR_DIGIT =
178      new CharMatcher("CharMatcher.JAVA_LETTER_OR_DIGIT") {
179    @Override public boolean matches(char c) {
180      return Character.isLetterOrDigit(c);
181    }
182  };
183
184  /**
185   * Determines whether a character is upper case according to {@linkplain
186   * Character#isUpperCase(char) Java's definition}.
187   */
188  public static final CharMatcher JAVA_UPPER_CASE =
189      new CharMatcher("CharMatcher.JAVA_UPPER_CASE") {
190    @Override public boolean matches(char c) {
191      return Character.isUpperCase(c);
192    }
193  };
194
195  /**
196   * Determines whether a character is lower case according to {@linkplain
197   * Character#isLowerCase(char) Java's definition}.
198   */
199  public static final CharMatcher JAVA_LOWER_CASE =
200      new CharMatcher("CharMatcher.JAVA_LOWER_CASE") {
201    @Override public boolean matches(char c) {
202      return Character.isLowerCase(c);
203    }
204  };
205
206  /**
207   * Determines whether a character is an ISO control character as specified by {@link
208   * Character#isISOControl(char)}.
209   */
210  public static final CharMatcher JAVA_ISO_CONTROL =
211      inRange('\u0000', '\u001f')
212      .or(inRange('\u007f', '\u009f'))
213      .withToString("CharMatcher.JAVA_ISO_CONTROL");
214
215  /**
216   * Determines whether a character is invisible; that is, if its Unicode category is any of
217   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
218   * PRIVATE_USE according to ICU4J.
219   */
220  public static final CharMatcher INVISIBLE = new RangesMatcher("CharMatcher.INVISIBLE", (
221      "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u2066\u2067\u2068"
222      + "\u2069\u206a\u3000\ud800\ufeff\ufff9\ufffa").toCharArray(), (
223      "\u0020\u00a0\u00ad\u0604\u061c\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u2066\u2067\u2068"
224      + "\u2069\u206f\u3000\uf8ff\ufeff\ufff9\ufffb").toCharArray());
225
226  private static String showCharacter(char c) {
227    String hex = "0123456789ABCDEF";
228    char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'};
229    for (int i = 0; i < 4; i++) {
230      tmp[5 - i] = hex.charAt(c & 0xF);
231      c >>= 4;
232    }
233    return String.copyValueOf(tmp);
234
235  }
236
237  /**
238   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
239   * errs on the side of returning {@code false} (that is, it tends to assume a character is
240   * double-width).
241   *
242   * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
243   * date.
244   */
245  public static final CharMatcher SINGLE_WIDTH = new RangesMatcher("CharMatcher.SINGLE_WIDTH",
246      "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(),
247      "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray());
248
249  /** Matches any character. */
250  public static final CharMatcher ANY =
251      new FastMatcher("CharMatcher.ANY") {
252        @Override public boolean matches(char c) {
253          return true;
254        }
255
256        @Override public int indexIn(CharSequence sequence) {
257          return (sequence.length() == 0) ? -1 : 0;
258        }
259
260        @Override public int indexIn(CharSequence sequence, int start) {
261          int length = sequence.length();
262          Preconditions.checkPositionIndex(start, length);
263          return (start == length) ? -1 : start;
264        }
265
266        @Override public int lastIndexIn(CharSequence sequence) {
267          return sequence.length() - 1;
268        }
269
270        @Override public boolean matchesAllOf(CharSequence sequence) {
271          checkNotNull(sequence);
272          return true;
273        }
274
275        @Override public boolean matchesNoneOf(CharSequence sequence) {
276          return sequence.length() == 0;
277        }
278
279        @Override public String removeFrom(CharSequence sequence) {
280          checkNotNull(sequence);
281          return "";
282        }
283
284        @Override public String replaceFrom(CharSequence sequence, char replacement) {
285          char[] array = new char[sequence.length()];
286          Arrays.fill(array, replacement);
287          return new String(array);
288        }
289
290        @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
291          StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
292          for (int i = 0; i < sequence.length(); i++) {
293            retval.append(replacement);
294          }
295          return retval.toString();
296        }
297
298        @Override public String collapseFrom(CharSequence sequence, char replacement) {
299          return (sequence.length() == 0) ? "" : String.valueOf(replacement);
300        }
301
302        @Override public String trimFrom(CharSequence sequence) {
303          checkNotNull(sequence);
304          return "";
305        }
306
307        @Override public int countIn(CharSequence sequence) {
308          return sequence.length();
309        }
310
311        @Override public CharMatcher and(CharMatcher other) {
312          return checkNotNull(other);
313        }
314
315        @Override public CharMatcher or(CharMatcher other) {
316          checkNotNull(other);
317          return this;
318        }
319
320        @Override public CharMatcher negate() {
321          return NONE;
322        }
323      };
324
325  /** Matches no characters. */
326  public static final CharMatcher NONE =
327      new FastMatcher("CharMatcher.NONE") {
328        @Override public boolean matches(char c) {
329          return false;
330        }
331
332        @Override public int indexIn(CharSequence sequence) {
333          checkNotNull(sequence);
334          return -1;
335        }
336
337        @Override public int indexIn(CharSequence sequence, int start) {
338          int length = sequence.length();
339          Preconditions.checkPositionIndex(start, length);
340          return -1;
341        }
342
343        @Override public int lastIndexIn(CharSequence sequence) {
344          checkNotNull(sequence);
345          return -1;
346        }
347
348        @Override public boolean matchesAllOf(CharSequence sequence) {
349          return sequence.length() == 0;
350        }
351
352        @Override public boolean matchesNoneOf(CharSequence sequence) {
353          checkNotNull(sequence);
354          return true;
355        }
356
357        @Override public String removeFrom(CharSequence sequence) {
358          return sequence.toString();
359        }
360
361        @Override public String replaceFrom(CharSequence sequence, char replacement) {
362          return sequence.toString();
363        }
364
365        @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
366          checkNotNull(replacement);
367          return sequence.toString();
368        }
369
370        @Override public String collapseFrom(CharSequence sequence, char replacement) {
371          return sequence.toString();
372        }
373
374        @Override public String trimFrom(CharSequence sequence) {
375          return sequence.toString();
376        }
377
378        @Override
379        public String trimLeadingFrom(CharSequence sequence) {
380          return sequence.toString();
381        }
382
383        @Override
384        public String trimTrailingFrom(CharSequence sequence) {
385          return sequence.toString();
386        }
387
388        @Override public int countIn(CharSequence sequence) {
389          checkNotNull(sequence);
390          return 0;
391        }
392
393        @Override public CharMatcher and(CharMatcher other) {
394          checkNotNull(other);
395          return this;
396        }
397
398        @Override public CharMatcher or(CharMatcher other) {
399          return checkNotNull(other);
400        }
401
402        @Override public CharMatcher negate() {
403          return ANY;
404        }
405      };
406
407  // Static factories
408
409  /**
410   * Returns a {@code char} matcher that matches only one specified character.
411   */
412  public static CharMatcher is(final char match) {
413    String description = "CharMatcher.is('" + showCharacter(match) + "')";
414    return new FastMatcher(description) {
415      @Override public boolean matches(char c) {
416        return c == match;
417      }
418
419      @Override public String replaceFrom(CharSequence sequence, char replacement) {
420        return sequence.toString().replace(match, replacement);
421      }
422
423      @Override public CharMatcher and(CharMatcher other) {
424        return other.matches(match) ? this : NONE;
425      }
426
427      @Override public CharMatcher or(CharMatcher other) {
428        return other.matches(match) ? other : super.or(other);
429      }
430
431      @Override public CharMatcher negate() {
432        return isNot(match);
433      }
434    };
435  }
436
437  /**
438   * Returns a {@code char} matcher that matches any character except the one specified.
439   *
440   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
441   */
442  public static CharMatcher isNot(final char match) {
443    String description = "CharMatcher.isNot('" + showCharacter(match) + "')";
444    return new FastMatcher(description) {
445      @Override public boolean matches(char c) {
446        return c != match;
447      }
448
449      @Override public CharMatcher and(CharMatcher other) {
450        return other.matches(match) ? super.and(other) : other;
451      }
452
453      @Override public CharMatcher or(CharMatcher other) {
454        return other.matches(match) ? ANY : this;
455      }
456
457      @Override public CharMatcher negate() {
458        return is(match);
459      }
460    };
461  }
462
463  /**
464   * Returns a {@code char} matcher that matches any character present in the given character
465   * sequence.
466   */
467  public static CharMatcher anyOf(final CharSequence sequence) {
468    switch (sequence.length()) {
469      case 0:
470        return NONE;
471      case 1:
472        return is(sequence.charAt(0));
473      case 2:
474        return isEither(sequence.charAt(0), sequence.charAt(1));
475      default:
476        // continue below to handle the general case
477    }
478    // TODO(user): is it potentially worth just going ahead and building a precomputed matcher?
479    final char[] chars = sequence.toString().toCharArray();
480    Arrays.sort(chars);
481    StringBuilder description = new StringBuilder("CharMatcher.anyOf(\"");
482    for (char c : chars) {
483      description.append(showCharacter(c));
484    }
485    description.append("\")");
486    return new CharMatcher(description.toString()) {
487      @Override public boolean matches(char c) {
488        return Arrays.binarySearch(chars, c) >= 0;
489      }
490    };
491  }
492
493  private static CharMatcher isEither(
494      final char match1,
495      final char match2) {
496    String description = "CharMatcher.anyOf(\"" +
497        showCharacter(match1) + showCharacter(match2) + "\")";
498    return new FastMatcher(description) {
499      @Override public boolean matches(char c) {
500        return c == match1 || c == match2;
501      }
502    };
503  }
504
505  /**
506   * Returns a {@code char} matcher that matches any character not present in the given character
507   * sequence.
508   */
509  public static CharMatcher noneOf(CharSequence sequence) {
510    return anyOf(sequence).negate();
511  }
512
513  /**
514   * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
515   * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
516   * CharMatcher.inRange('a', 'z')}.
517   *
518   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
519   */
520  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
521    checkArgument(endInclusive >= startInclusive);
522    String description = "CharMatcher.inRange('" +
523        showCharacter(startInclusive) + "', '" +
524        showCharacter(endInclusive) + "')";
525    return inRange(startInclusive, endInclusive, description);
526  }
527
528  static CharMatcher inRange(final char startInclusive, final char endInclusive,
529      String description) {
530    return new FastMatcher(description) {
531      @Override public boolean matches(char c) {
532        return startInclusive <= c && c <= endInclusive;
533      }
534    };
535  }
536
537  /**
538   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
539   * which operates on primitive {@code char} instances instead.
540   */
541  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
542    checkNotNull(predicate);
543    if (predicate instanceof CharMatcher) {
544      return (CharMatcher) predicate;
545    }
546    String description = "CharMatcher.forPredicate(" + predicate + ")";
547    return new CharMatcher(description) {
548      @Override public boolean matches(char c) {
549        return predicate.apply(c);
550      }
551
552      @Override public boolean apply(Character character) {
553        return predicate.apply(checkNotNull(character));
554      }
555    };
556  }
557
558  // State
559  final String description;
560
561  // Constructors
562
563  /**
564   * Sets the {@code toString()} from the given description.
565   */
566  CharMatcher(String description) {
567    this.description = description;
568  }
569
570  /**
571   * Constructor for use by subclasses. When subclassing, you may want to override
572   * {@code toString()} to provide a useful description.
573   */
574  protected CharMatcher() {
575    description = super.toString();
576  }
577
578  // Abstract methods
579
580  /** Determines a true or false value for the given character. */
581  public abstract boolean matches(char c);
582
583  // Non-static factories
584
585  /**
586   * Returns a matcher that matches any character not matched by this matcher.
587   */
588  public CharMatcher negate() {
589    return new NegatedMatcher(this);
590  }
591
592  private static class NegatedMatcher extends CharMatcher {
593    final CharMatcher original;
594
595    NegatedMatcher(String toString, CharMatcher original) {
596      super(toString);
597      this.original = original;
598    }
599
600    NegatedMatcher(CharMatcher original) {
601      this(original + ".negate()", original);
602    }
603
604    @Override public boolean matches(char c) {
605      return !original.matches(c);
606    }
607
608    @Override public boolean matchesAllOf(CharSequence sequence) {
609      return original.matchesNoneOf(sequence);
610    }
611
612    @Override public boolean matchesNoneOf(CharSequence sequence) {
613      return original.matchesAllOf(sequence);
614    }
615
616    @Override public int countIn(CharSequence sequence) {
617      return sequence.length() - original.countIn(sequence);
618    }
619
620    @Override public CharMatcher negate() {
621      return original;
622    }
623
624    @Override
625    CharMatcher withToString(String description) {
626      return new NegatedMatcher(description, original);
627    }
628  }
629
630  /**
631   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
632   */
633  public CharMatcher and(CharMatcher other) {
634    return new And(this, checkNotNull(other));
635  }
636
637  private static class And extends CharMatcher {
638    final CharMatcher first;
639    final CharMatcher second;
640
641    And(CharMatcher a, CharMatcher b) {
642      this(a, b, "CharMatcher.and(" + a + ", " + b + ")");
643    }
644
645    And(CharMatcher a, CharMatcher b, String description) {
646      super(description);
647      first = checkNotNull(a);
648      second = checkNotNull(b);
649    }
650
651    @Override
652    public boolean matches(char c) {
653      return first.matches(c) && second.matches(c);
654    }
655
656    @Override
657    CharMatcher withToString(String description) {
658      return new And(first, second, description);
659    }
660  }
661
662  /**
663   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
664   */
665  public CharMatcher or(CharMatcher other) {
666    return new Or(this, checkNotNull(other));
667  }
668
669  private static class Or extends CharMatcher {
670    final CharMatcher first;
671    final CharMatcher second;
672
673    Or(CharMatcher a, CharMatcher b, String description) {
674      super(description);
675      first = checkNotNull(a);
676      second = checkNotNull(b);
677    }
678
679    Or(CharMatcher a, CharMatcher b) {
680      this(a, b, "CharMatcher.or(" + a + ", " + b + ")");
681    }
682
683    @Override
684    public boolean matches(char c) {
685      return first.matches(c) || second.matches(c);
686    }
687
688    @Override
689    CharMatcher withToString(String description) {
690      return new Or(first, second, description);
691    }
692  }
693
694  /**
695   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
696   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
697   * worthwhile only if the precomputed matcher is queried many thousands of times.
698   *
699   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
700   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
701   * worthwhile tradeoff in a browser.
702   */
703  public CharMatcher precomputed() {
704    return Platform.precomputeCharMatcher(this);
705  }
706
707  /**
708   * Subclasses should provide a new CharMatcher with the same characteristics as {@code this},
709   * but with their {@code toString} method overridden with the new description.
710   *
711   * <p>This is unsupported by default.
712   */
713  CharMatcher withToString(String description) {
714    throw new UnsupportedOperationException();
715  }
716
717  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1;
718
719  /**
720   * A matcher for which precomputation will not yield any significant benefit.
721   */
722  abstract static class FastMatcher extends CharMatcher {
723    FastMatcher() {
724      super();
725    }
726
727    FastMatcher(String description) {
728      super(description);
729    }
730
731    @Override
732    public final CharMatcher precomputed() {
733      return this;
734    }
735
736    @Override
737    public CharMatcher negate() {
738      return new NegatedFastMatcher(this);
739    }
740  }
741
742  static final class NegatedFastMatcher extends NegatedMatcher {
743    NegatedFastMatcher(CharMatcher original) {
744      super(original);
745    }
746
747    NegatedFastMatcher(String toString, CharMatcher original) {
748      super(toString, original);
749    }
750
751    @Override
752    public final CharMatcher precomputed() {
753      return this;
754    }
755
756    @Override
757    CharMatcher withToString(String description) {
758      return new NegatedFastMatcher(description, original);
759    }
760  }
761
762  // Text processing routines
763
764  /**
765   * Returns {@code true} if a character sequence contains at least one matching character.
766   * Equivalent to {@code !matchesNoneOf(sequence)}.
767   *
768   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
769   * character, until this returns {@code true} or the end is reached.
770   *
771   * @param sequence the character sequence to examine, possibly empty
772   * @return {@code true} if this matcher matches at least one character in the sequence
773   * @since 8.0
774   */
775  public boolean matchesAnyOf(CharSequence sequence) {
776    return !matchesNoneOf(sequence);
777  }
778
779  /**
780   * Returns {@code true} if a character sequence contains only matching characters.
781   *
782   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
783   * character, until this returns {@code false} or the end is reached.
784   *
785   * @param sequence the character sequence to examine, possibly empty
786   * @return {@code true} if this matcher matches every character in the sequence, including when
787   *         the sequence is empty
788   */
789  public boolean matchesAllOf(CharSequence sequence) {
790    for (int i = sequence.length() - 1; i >= 0; i--) {
791      if (!matches(sequence.charAt(i))) {
792        return false;
793      }
794    }
795    return true;
796  }
797
798  /**
799   * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
800   * {@code !matchesAnyOf(sequence)}.
801   *
802   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
803   * character, until this returns {@code false} or the end is reached.
804   *
805   * @param sequence the character sequence to examine, possibly empty
806   * @return {@code true} if this matcher matches every character in the sequence, including when
807   *         the sequence is empty
808   */
809  public boolean matchesNoneOf(CharSequence sequence) {
810    return indexIn(sequence) == -1;
811  }
812
813  /**
814   * Returns the index of the first matching character in a character sequence, or {@code -1} if no
815   * matching character is present.
816   *
817   * <p>The default implementation iterates over the sequence in forward order calling {@link
818   * #matches} for each character.
819   *
820   * @param sequence the character sequence to examine from the beginning
821   * @return an index, or {@code -1} if no character matches
822   */
823  public int indexIn(CharSequence sequence) {
824    int length = sequence.length();
825    for (int i = 0; i < length; i++) {
826      if (matches(sequence.charAt(i))) {
827        return i;
828      }
829    }
830    return -1;
831  }
832
833  /**
834   * Returns the index of the first matching character in a character sequence, starting from a
835   * given position, or {@code -1} if no character matches after that position.
836   *
837   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
838   * start}, calling {@link #matches} for each character.
839   *
840   * @param sequence the character sequence to examine
841   * @param start the first index to examine; must be nonnegative and no greater than {@code
842   *        sequence.length()}
843   * @return the index of the first matching character, guaranteed to be no less than {@code start},
844   *         or {@code -1} if no character matches
845   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
846   *         sequence.length()}
847   */
848  public int indexIn(CharSequence sequence, int start) {
849    int length = sequence.length();
850    Preconditions.checkPositionIndex(start, length);
851    for (int i = start; i < length; i++) {
852      if (matches(sequence.charAt(i))) {
853        return i;
854      }
855    }
856    return -1;
857  }
858
859  /**
860   * Returns the index of the last matching character in a character sequence, or {@code -1} if no
861   * matching character is present.
862   *
863   * <p>The default implementation iterates over the sequence in reverse order calling {@link
864   * #matches} for each character.
865   *
866   * @param sequence the character sequence to examine from the end
867   * @return an index, or {@code -1} if no character matches
868   */
869  public int lastIndexIn(CharSequence sequence) {
870    for (int i = sequence.length() - 1; i >= 0; i--) {
871      if (matches(sequence.charAt(i))) {
872        return i;
873      }
874    }
875    return -1;
876  }
877
878  /**
879   * Returns the number of matching characters found in a character sequence.
880   */
881  public int countIn(CharSequence sequence) {
882    int count = 0;
883    for (int i = 0; i < sequence.length(); i++) {
884      if (matches(sequence.charAt(i))) {
885        count++;
886      }
887    }
888    return count;
889  }
890
891  /**
892   * Returns a string containing all non-matching characters of a character sequence, in order. For
893   * example: <pre>   {@code
894   *
895   *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
896   *
897   * ... returns {@code "bzr"}.
898   */
899  @CheckReturnValue
900  public String removeFrom(CharSequence sequence) {
901    String string = sequence.toString();
902    int pos = indexIn(string);
903    if (pos == -1) {
904      return string;
905    }
906
907    char[] chars = string.toCharArray();
908    int spread = 1;
909
910    // This unusual loop comes from extensive benchmarking
911    OUT: while (true) {
912      pos++;
913      while (true) {
914        if (pos == chars.length) {
915          break OUT;
916        }
917        if (matches(chars[pos])) {
918          break;
919        }
920        chars[pos - spread] = chars[pos];
921        pos++;
922      }
923      spread++;
924    }
925    return new String(chars, 0, pos - spread);
926  }
927
928  /**
929   * Returns a string containing all matching characters of a character sequence, in order. For
930   * example: <pre>   {@code
931   *
932   *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
933   *
934   * ... returns {@code "aaa"}.
935   */
936  @CheckReturnValue
937  public String retainFrom(CharSequence sequence) {
938    return negate().removeFrom(sequence);
939  }
940
941  /**
942   * Returns a string copy of the input character sequence, with each character that matches this
943   * matcher replaced by a given replacement character. For example: <pre>   {@code
944   *
945   *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
946   *
947   * ... returns {@code "rodor"}.
948   *
949   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
950   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
951   * character.
952   *
953   * @param sequence the character sequence to replace matching characters in
954   * @param replacement the character to append to the result string in place of each matching
955   *        character in {@code sequence}
956   * @return the new string
957   */
958  @CheckReturnValue
959  public String replaceFrom(CharSequence sequence, char replacement) {
960    String string = sequence.toString();
961    int pos = indexIn(string);
962    if (pos == -1) {
963      return string;
964    }
965    char[] chars = string.toCharArray();
966    chars[pos] = replacement;
967    for (int i = pos + 1; i < chars.length; i++) {
968      if (matches(chars[i])) {
969        chars[i] = replacement;
970      }
971    }
972    return new String(chars);
973  }
974
975  /**
976   * Returns a string copy of the input character sequence, with each character that matches this
977   * matcher replaced by a given replacement sequence. For example: <pre>   {@code
978   *
979   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
980   *
981   * ... returns {@code "yoohoo"}.
982   *
983   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
984   * off calling {@link #replaceFrom(CharSequence, char)} directly.
985   *
986   * @param sequence the character sequence to replace matching characters in
987   * @param replacement the characters to append to the result string in place of each matching
988   *        character in {@code sequence}
989   * @return the new string
990   */
991  @CheckReturnValue
992  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
993    int replacementLen = replacement.length();
994    if (replacementLen == 0) {
995      return removeFrom(sequence);
996    }
997    if (replacementLen == 1) {
998      return replaceFrom(sequence, replacement.charAt(0));
999    }
1000
1001    String string = sequence.toString();
1002    int pos = indexIn(string);
1003    if (pos == -1) {
1004      return string;
1005    }
1006
1007    int len = string.length();
1008    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
1009
1010    int oldpos = 0;
1011    do {
1012      buf.append(string, oldpos, pos);
1013      buf.append(replacement);
1014      oldpos = pos + 1;
1015      pos = indexIn(string, oldpos);
1016    } while (pos != -1);
1017
1018    buf.append(string, oldpos, len);
1019    return buf.toString();
1020  }
1021
1022  /**
1023   * Returns a substring of the input character sequence that omits all characters this matcher
1024   * matches from the beginning and from the end of the string. For example: <pre>   {@code
1025   *
1026   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
1027   *
1028   * ... returns {@code "cat"}.
1029   *
1030   * <p>Note that: <pre>   {@code
1031   *
1032   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
1033   *
1034   * ... is equivalent to {@link String#trim()}.
1035   */
1036  @CheckReturnValue
1037  public String trimFrom(CharSequence sequence) {
1038    int len = sequence.length();
1039    int first;
1040    int last;
1041
1042    for (first = 0; first < len; first++) {
1043      if (!matches(sequence.charAt(first))) {
1044        break;
1045      }
1046    }
1047    for (last = len - 1; last > first; last--) {
1048      if (!matches(sequence.charAt(last))) {
1049        break;
1050      }
1051    }
1052
1053    return sequence.subSequence(first, last + 1).toString();
1054  }
1055
1056  /**
1057   * Returns a substring of the input character sequence that omits all characters this matcher
1058   * matches from the beginning of the string. For example: <pre> {@code
1059   *
1060   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1061   *
1062   * ... returns {@code "catbab"}.
1063   */
1064  @CheckReturnValue
1065  public String trimLeadingFrom(CharSequence sequence) {
1066    int len = sequence.length();
1067    for (int first = 0; first < len; first++) {
1068      if (!matches(sequence.charAt(first))) {
1069        return sequence.subSequence(first, len).toString();
1070      }
1071    }
1072    return "";
1073  }
1074
1075  /**
1076   * Returns a substring of the input character sequence that omits all characters this matcher
1077   * matches from the end of the string. For example: <pre> {@code
1078   *
1079   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1080   *
1081   * ... returns {@code "abacat"}.
1082   */
1083  @CheckReturnValue
1084  public String trimTrailingFrom(CharSequence sequence) {
1085    int len = sequence.length();
1086    for (int last = len - 1; last >= 0; last--) {
1087      if (!matches(sequence.charAt(last))) {
1088        return sequence.subSequence(0, last + 1).toString();
1089      }
1090    }
1091    return "";
1092  }
1093
1094  /**
1095   * Returns a string copy of the input character sequence, with each group of consecutive
1096   * characters that match this matcher replaced by a single replacement character. For example:
1097   * <pre>   {@code
1098   *
1099   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1100   *
1101   * ... returns {@code "b-p-r"}.
1102   *
1103   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1104   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1105   * character.
1106   *
1107   * @param sequence the character sequence to replace matching groups of characters in
1108   * @param replacement the character to append to the result string in place of each group of
1109   *        matching characters in {@code sequence}
1110   * @return the new string
1111   */
1112  @CheckReturnValue
1113  public String collapseFrom(CharSequence sequence, char replacement) {
1114    // This implementation avoids unnecessary allocation.
1115    int len = sequence.length();
1116    for (int i = 0; i < len; i++) {
1117      char c = sequence.charAt(i);
1118      if (matches(c)) {
1119        if (c == replacement
1120            && (i == len - 1 || !matches(sequence.charAt(i + 1)))) {
1121          // a no-op replacement
1122          i++;
1123        } else {
1124          StringBuilder builder = new StringBuilder(len)
1125              .append(sequence.subSequence(0, i))
1126              .append(replacement);
1127          return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true);
1128        }
1129      }
1130    }
1131    // no replacement needed
1132    return sequence.toString();
1133  }
1134
1135  /**
1136   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1137   * groups of matching characters at the start or end of the sequence are removed without
1138   * replacement.
1139   */
1140  @CheckReturnValue
1141  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1142    // This implementation avoids unnecessary allocation.
1143    int len = sequence.length();
1144    int first;
1145    int last;
1146
1147    for (first = 0; first < len && matches(sequence.charAt(first)); first++) {}
1148    for (last = len - 1; last > first && matches(sequence.charAt(last)); last--) {}
1149
1150    return (first == 0 && last == len - 1)
1151        ? collapseFrom(sequence, replacement)
1152        : finishCollapseFrom(
1153              sequence, first, last + 1, replacement,
1154              new StringBuilder(last + 1 - first),
1155              false);
1156  }
1157
1158  private String finishCollapseFrom(
1159      CharSequence sequence, int start, int end, char replacement,
1160      StringBuilder builder, boolean inMatchingGroup) {
1161    for (int i = start; i < end; i++) {
1162      char c = sequence.charAt(i);
1163      if (matches(c)) {
1164        if (!inMatchingGroup) {
1165          builder.append(replacement);
1166          inMatchingGroup = true;
1167        }
1168      } else {
1169        builder.append(c);
1170        inMatchingGroup = false;
1171      }
1172    }
1173    return builder.toString();
1174  }
1175
1176  /**
1177   * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches}
1178   *     instead.
1179   */
1180  @Deprecated
1181  @Override
1182  public boolean apply(Character character) {
1183    return matches(character);
1184  }
1185
1186  /**
1187   * Returns a string representation of this {@code CharMatcher}, such as
1188   * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}.
1189   */
1190  @Override
1191  public String toString() {
1192    return description;
1193  }
1194
1195  static final String WHITESPACE_TABLE = ""
1196      + "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000"
1197      + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680"
1198      + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009"
1199      + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000";
1200  static final int WHITESPACE_MULTIPLIER = 1682554634;
1201  static final int WHITESPACE_SHIFT = Integer.numberOfLeadingZeros(WHITESPACE_TABLE.length() - 1);
1202
1203  /**
1204   * Determines whether a character is whitespace according to the latest Unicode standard, as
1205   * illustrated
1206   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
1207   * This is not the same definition used by other Java APIs. (See a
1208   * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
1209   * definitions of "whitespace"</a>.)
1210   *
1211   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
1212   * to date.
1213   */
1214  public static final CharMatcher WHITESPACE = new FastMatcher("WHITESPACE") {
1215    @Override
1216    public boolean matches(char c) {
1217      return WHITESPACE_TABLE.charAt((WHITESPACE_MULTIPLIER * c) >>> WHITESPACE_SHIFT) == c;
1218    }
1219  };
1220}
1221