1/*
2 * Copyright (C) 2008 The Guava Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.common.base;
18
19import static com.google.common.base.Preconditions.checkArgument;
20import static com.google.common.base.Preconditions.checkNotNull;
21
22import com.google.common.annotations.Beta;
23import com.google.common.annotations.GwtCompatible;
24
25import java.util.ArrayList;
26import java.util.Arrays;
27import java.util.List;
28
29import javax.annotation.CheckReturnValue;
30
31/**
32 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
33 * for any {@link Object}. Also offers basic text processing methods based on this function.
34 * Implementations are strongly encouraged to be side-effect-free and immutable.
35 *
36 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
37 * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
38 *
39 * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
40 * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
41 * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
42 * treats these just as two separate characters.
43 *
44 * <p>Example usages: <pre>
45 *   String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput);
46 *   if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
47 *
48 * @author Kevin Bourrillion
49 * @since 1.0
50 */
51@Beta // Possibly change from chars to code points; decide constants vs. methods
52@GwtCompatible
53public abstract class CharMatcher implements Predicate<Character> {
54  // Constants
55
56  // Excludes 2000-2000a, which is handled as a range
57  private static final String BREAKING_WHITESPACE_CHARS =
58      "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
59
60  // Excludes 2007, which is handled as a gap in a pair of ranges
61  private static final String NON_BREAKING_WHITESPACE_CHARS =
62      "\u00a0\u180e\u202f";
63
64  /**
65   * Determines whether a character is whitespace according to the latest Unicode standard, as
66   * illustrated
67   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
68   * This is not the same definition used by other Java APIs. (See a
69   * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
70   * definitions of "whitespace"</a>.)
71   *
72   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
73   * to date.
74   */
75  public static final CharMatcher WHITESPACE =
76      anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
77          .or(inRange('\u2000', '\u200a'))
78          .precomputed();
79
80  /**
81   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
82   * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
83   * discussion of that term.
84   *
85   * @since 2.0
86   */
87  public static final CharMatcher BREAKING_WHITESPACE =
88      anyOf(BREAKING_WHITESPACE_CHARS)
89          .or(inRange('\u2000', '\u2006'))
90          .or(inRange('\u2008', '\u200a'))
91          .precomputed();
92
93  /**
94   * Determines whether a character is ASCII, meaning that its code point is less than 128.
95   */
96  public static final CharMatcher ASCII = inRange('\0', '\u007f');
97
98  /**
99   * Determines whether a character is a digit according to
100   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
101   */
102  public static final CharMatcher DIGIT;
103
104  static {
105    CharMatcher digit = inRange('0', '9');
106    String zeroes =
107        "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
108            + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
109            + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
110    for (char base : zeroes.toCharArray()) {
111      digit = digit.or(inRange(base, (char) (base + 9)));
112    }
113    DIGIT = digit.precomputed();
114  }
115
116  /**
117   * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
118   * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
119   */
120  public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
121    @Override public boolean matches(char c) {
122      return Character.isDigit(c);
123    }
124  };
125
126  /**
127   * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
128   * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
129   * inRange('a', 'z').or(inRange('A', 'Z'))}.
130   */
131  public static final CharMatcher JAVA_LETTER = new CharMatcher() {
132    @Override public boolean matches(char c) {
133      return Character.isLetter(c);
134    }
135  };
136
137  /**
138   * Determines whether a character is a letter or digit according to {@link
139   * Character#isLetterOrDigit(char) Java's definition}.
140   */
141  public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
142    @Override public boolean matches(char c) {
143      return Character.isLetterOrDigit(c);
144    }
145  };
146
147  /**
148   * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
149   * Java's definition}.
150   */
151  public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
152    @Override public boolean matches(char c) {
153      return Character.isUpperCase(c);
154    }
155  };
156
157  /**
158   * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
159   * Java's definition}.
160   */
161  public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
162    @Override public boolean matches(char c) {
163      return Character.isLowerCase(c);
164    }
165  };
166
167  /**
168   * Determines whether a character is an ISO control character as specified by {@link
169   * Character#isISOControl(char)}.
170   */
171  public static final CharMatcher JAVA_ISO_CONTROL =
172      inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
173
174  /**
175   * Determines whether a character is invisible; that is, if its Unicode category is any of
176   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
177   * PRIVATE_USE according to ICU4J.
178   */
179  public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
180      .or(inRange('\u007f', '\u00a0'))
181      .or(is('\u00ad'))
182      .or(inRange('\u0600', '\u0603'))
183      .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
184      .or(inRange('\u2000', '\u200f'))
185      .or(inRange('\u2028', '\u202f'))
186      .or(inRange('\u205f', '\u2064'))
187      .or(inRange('\u206a', '\u206f'))
188      .or(is('\u3000'))
189      .or(inRange('\ud800', '\uf8ff'))
190      .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
191      .precomputed();
192
193  /**
194   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
195   * errs on the side of returning {@code false} (that is, it tends to assume a character is
196   * double-width).
197   *
198   * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
199   * date.
200   */
201  public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
202      .or(is('\u05be'))
203      .or(inRange('\u05d0', '\u05ea'))
204      .or(is('\u05f3'))
205      .or(is('\u05f4'))
206      .or(inRange('\u0600', '\u06ff'))
207      .or(inRange('\u0750', '\u077f'))
208      .or(inRange('\u0e00', '\u0e7f'))
209      .or(inRange('\u1e00', '\u20af'))
210      .or(inRange('\u2100', '\u213a'))
211      .or(inRange('\ufb50', '\ufdff'))
212      .or(inRange('\ufe70', '\ufeff'))
213      .or(inRange('\uff61', '\uffdc'))
214      .precomputed();
215
216  /** Matches any character. */
217  public static final CharMatcher ANY =
218      new CharMatcher() {
219        @Override public boolean matches(char c) {
220          return true;
221        }
222
223        @Override public int indexIn(CharSequence sequence) {
224          return (sequence.length() == 0) ? -1 : 0;
225        }
226
227        @Override public int indexIn(CharSequence sequence, int start) {
228          int length = sequence.length();
229          Preconditions.checkPositionIndex(start, length);
230          return (start == length) ? -1 : start;
231        }
232
233        @Override public int lastIndexIn(CharSequence sequence) {
234          return sequence.length() - 1;
235        }
236
237        @Override public boolean matchesAllOf(CharSequence sequence) {
238          checkNotNull(sequence);
239          return true;
240        }
241
242        @Override public boolean matchesNoneOf(CharSequence sequence) {
243          return sequence.length() == 0;
244        }
245
246        @Override public String removeFrom(CharSequence sequence) {
247          checkNotNull(sequence);
248          return "";
249        }
250
251        @Override public String replaceFrom(CharSequence sequence, char replacement) {
252          char[] array = new char[sequence.length()];
253          Arrays.fill(array, replacement);
254          return new String(array);
255        }
256
257        @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
258          StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
259          for (int i = 0; i < sequence.length(); i++) {
260            retval.append(replacement);
261          }
262          return retval.toString();
263        }
264
265        @Override public String collapseFrom(CharSequence sequence, char replacement) {
266          return (sequence.length() == 0) ? "" : String.valueOf(replacement);
267        }
268
269        @Override public String trimFrom(CharSequence sequence) {
270          checkNotNull(sequence);
271          return "";
272        }
273
274        @Override public int countIn(CharSequence sequence) {
275          return sequence.length();
276        }
277
278        @Override public CharMatcher and(CharMatcher other) {
279          return checkNotNull(other);
280        }
281
282        @Override public CharMatcher or(CharMatcher other) {
283          checkNotNull(other);
284          return this;
285        }
286
287        @Override public CharMatcher negate() {
288          return NONE;
289        }
290
291        @Override public CharMatcher precomputed() {
292          return this;
293        }
294      };
295
296  /** Matches no characters. */
297  public static final CharMatcher NONE =
298      new CharMatcher() {
299        @Override public boolean matches(char c) {
300          return false;
301        }
302
303        @Override public int indexIn(CharSequence sequence) {
304          checkNotNull(sequence);
305          return -1;
306        }
307
308        @Override public int indexIn(CharSequence sequence, int start) {
309          int length = sequence.length();
310          Preconditions.checkPositionIndex(start, length);
311          return -1;
312        }
313
314        @Override public int lastIndexIn(CharSequence sequence) {
315          checkNotNull(sequence);
316          return -1;
317        }
318
319        @Override public boolean matchesAllOf(CharSequence sequence) {
320          return sequence.length() == 0;
321        }
322
323        @Override public boolean matchesNoneOf(CharSequence sequence) {
324          checkNotNull(sequence);
325          return true;
326        }
327
328        @Override public String removeFrom(CharSequence sequence) {
329          return sequence.toString();
330        }
331
332        @Override public String replaceFrom(CharSequence sequence, char replacement) {
333          return sequence.toString();
334        }
335
336        @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
337          checkNotNull(replacement);
338          return sequence.toString();
339        }
340
341        @Override public String collapseFrom(CharSequence sequence, char replacement) {
342          return sequence.toString();
343        }
344
345        @Override public String trimFrom(CharSequence sequence) {
346          return sequence.toString();
347        }
348
349        @Override public int countIn(CharSequence sequence) {
350          checkNotNull(sequence);
351          return 0;
352        }
353
354        @Override public CharMatcher and(CharMatcher other) {
355          checkNotNull(other);
356          return this;
357        }
358
359        @Override public CharMatcher or(CharMatcher other) {
360          return checkNotNull(other);
361        }
362
363        @Override public CharMatcher negate() {
364          return ANY;
365        }
366
367        @Override void setBits(LookupTable table) {}
368
369        @Override public CharMatcher precomputed() {
370          return this;
371        }
372      };
373
374  // Static factories
375
376  /**
377   * Returns a {@code char} matcher that matches only one specified character.
378   */
379  public static CharMatcher is(final char match) {
380    return new CharMatcher() {
381      @Override public boolean matches(char c) {
382        return c == match;
383      }
384
385      @Override public String replaceFrom(CharSequence sequence, char replacement) {
386        return sequence.toString().replace(match, replacement);
387      }
388
389      @Override public CharMatcher and(CharMatcher other) {
390        return other.matches(match) ? this : NONE;
391      }
392
393      @Override public CharMatcher or(CharMatcher other) {
394        return other.matches(match) ? other : super.or(other);
395      }
396
397      @Override public CharMatcher negate() {
398        return isNot(match);
399      }
400
401      @Override void setBits(LookupTable table) {
402        table.set(match);
403      }
404
405      @Override public CharMatcher precomputed() {
406        return this;
407      }
408    };
409  }
410
411  /**
412   * Returns a {@code char} matcher that matches any character except the one specified.
413   *
414   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
415   */
416  public static CharMatcher isNot(final char match) {
417    return new CharMatcher() {
418      @Override public boolean matches(char c) {
419        return c != match;
420      }
421
422      @Override public CharMatcher and(CharMatcher other) {
423        return other.matches(match) ? super.and(other) : other;
424      }
425
426      @Override public CharMatcher or(CharMatcher other) {
427        return other.matches(match) ? ANY : this;
428      }
429
430      @Override public CharMatcher negate() {
431        return is(match);
432      }
433    };
434  }
435
436  /**
437   * Returns a {@code char} matcher that matches any character present in the given character
438   * sequence.
439   */
440  public static CharMatcher anyOf(final CharSequence sequence) {
441    switch (sequence.length()) {
442      case 0:
443        return NONE;
444      case 1:
445        return is(sequence.charAt(0));
446      case 2:
447        final char match1 = sequence.charAt(0);
448        final char match2 = sequence.charAt(1);
449        return new CharMatcher() {
450          @Override public boolean matches(char c) {
451            return c == match1 || c == match2;
452          }
453
454          @Override void setBits(LookupTable table) {
455            table.set(match1);
456            table.set(match2);
457          }
458
459          @Override public CharMatcher precomputed() {
460            return this;
461          }
462        };
463    }
464
465    final char[] chars = sequence.toString().toCharArray();
466    Arrays.sort(chars); // not worth collapsing duplicates
467
468    return new CharMatcher() {
469      @Override public boolean matches(char c) {
470        return Arrays.binarySearch(chars, c) >= 0;
471      }
472
473      @Override void setBits(LookupTable table) {
474        for (char c : chars) {
475          table.set(c);
476        }
477      }
478    };
479  }
480
481  /**
482   * Returns a {@code char} matcher that matches any character not present in the given character
483   * sequence.
484   */
485  public static CharMatcher noneOf(CharSequence sequence) {
486    return anyOf(sequence).negate();
487  }
488
489  /**
490   * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
491   * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
492   * CharMatcher.inRange('a', 'z')}.
493   *
494   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
495   */
496  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
497    checkArgument(endInclusive >= startInclusive);
498    return new CharMatcher() {
499      @Override public boolean matches(char c) {
500        return startInclusive <= c && c <= endInclusive;
501      }
502
503      @Override void setBits(LookupTable table) {
504        char c = startInclusive;
505        while (true) {
506          table.set(c);
507          if (c++ == endInclusive) {
508            break;
509          }
510        }
511      }
512
513      @Override public CharMatcher precomputed() {
514        return this;
515      }
516    };
517  }
518
519  /**
520   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
521   * which operates on primitive {@code char} instances instead.
522   */
523  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
524    checkNotNull(predicate);
525    if (predicate instanceof CharMatcher) {
526      return (CharMatcher) predicate;
527    }
528    return new CharMatcher() {
529      @Override public boolean matches(char c) {
530        return predicate.apply(c);
531      }
532
533      @Override public boolean apply(Character character) {
534        return predicate.apply(checkNotNull(character));
535      }
536    };
537  }
538
539  // Constructors
540
541  /**
542   * Constructor for use by subclasses.
543   */
544  protected CharMatcher() {}
545
546  // Abstract methods
547
548  /** Determines a true or false value for the given character. */
549  public abstract boolean matches(char c);
550
551  // Non-static factories
552
553  /**
554   * Returns a matcher that matches any character not matched by this matcher.
555   */
556  public CharMatcher negate() {
557    final CharMatcher original = this;
558    return new CharMatcher() {
559      @Override public boolean matches(char c) {
560        return !original.matches(c);
561      }
562
563      @Override public boolean matchesAllOf(CharSequence sequence) {
564        return original.matchesNoneOf(sequence);
565      }
566
567      @Override public boolean matchesNoneOf(CharSequence sequence) {
568        return original.matchesAllOf(sequence);
569      }
570
571      @Override public int countIn(CharSequence sequence) {
572        return sequence.length() - original.countIn(sequence);
573      }
574
575      @Override public CharMatcher negate() {
576        return original;
577      }
578    };
579  }
580
581  /**
582   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
583   */
584  public CharMatcher and(CharMatcher other) {
585    return new And(Arrays.asList(this, checkNotNull(other)));
586  }
587
588  private static class And extends CharMatcher {
589    List<CharMatcher> components;
590
591    And(List<CharMatcher> components) {
592      this.components = components; // Skip defensive copy (private)
593    }
594
595    @Override public boolean matches(char c) {
596      for (CharMatcher matcher : components) {
597        if (!matcher.matches(c)) {
598          return false;
599        }
600      }
601      return true;
602    }
603
604    @Override public CharMatcher and(CharMatcher other) {
605      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
606      newComponents.add(checkNotNull(other));
607      return new And(newComponents);
608    }
609  }
610
611  /**
612   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
613   */
614  public CharMatcher or(CharMatcher other) {
615    return new Or(Arrays.asList(this, checkNotNull(other)));
616  }
617
618  private static class Or extends CharMatcher {
619    List<CharMatcher> components;
620
621    Or(List<CharMatcher> components) {
622      this.components = components; // Skip defensive copy (private)
623    }
624
625    @Override public boolean matches(char c) {
626      for (CharMatcher matcher : components) {
627        if (matcher.matches(c)) {
628          return true;
629        }
630      }
631      return false;
632    }
633
634    @Override public CharMatcher or(CharMatcher other) {
635      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
636      newComponents.add(checkNotNull(other));
637      return new Or(newComponents);
638    }
639
640    @Override void setBits(LookupTable table) {
641      for (CharMatcher matcher : components) {
642        matcher.setBits(table);
643      }
644    }
645  }
646
647  /**
648   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
649   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
650   * worthwhile only if the precomputed matcher is queried many thousands of times.
651   *
652   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
653   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
654   * worthwhile tradeoff in a browser.
655   */
656  public CharMatcher precomputed() {
657    return Platform.precomputeCharMatcher(this);
658  }
659
660  /**
661   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
662   * on {@link Platform} so that we can have different behavior in GWT.
663   *
664   * <p>The default precomputation is to cache the configuration of the original matcher in an
665   * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
666   * than the original.
667   *
668   * <p>The default implementation creates a new bit array and passes it to {@link
669   * #setBits(LookupTable)}.
670   */
671  CharMatcher precomputedInternal() {
672    final LookupTable table = new LookupTable();
673    setBits(table);
674
675    return new CharMatcher() {
676      @Override public boolean matches(char c) {
677        return table.get(c);
678      }
679
680      // TODO(kevinb): make methods like negate() smart?
681
682      @Override public CharMatcher precomputed() {
683        return this;
684      }
685    };
686  }
687
688  /**
689   * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
690   * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
691   *
692   * <p>The default implementation loops over every possible character value, invoking {@link
693   * #matches} for each one.
694   */
695  void setBits(LookupTable table) {
696    char c = Character.MIN_VALUE;
697    while (true) {
698      if (matches(c)) {
699        table.set(c);
700      }
701      if (c++ == Character.MAX_VALUE) {
702        break;
703      }
704    }
705  }
706
707  /**
708   * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
709   *
710   * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
711   * simpler java.util.BitSet.
712   */
713  private static final class LookupTable {
714    int[] data = new int[2048];
715
716    void set(char index) {
717      data[index >> 5] |= (1 << index);
718    }
719
720    boolean get(char index) {
721      return (data[index >> 5] & (1 << index)) != 0;
722    }
723  }
724
725  // Text processing routines
726
727  /**
728   * Returns {@code true} if a character sequence contains at least one matching character.
729   * Equivalent to {@code !matchesNoneOf(sequence)}.
730   *
731   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
732   * character, until this returns {@code true} or the end is reached.
733   *
734   * @param sequence the character sequence to examine, possibly empty
735   * @return {@code true} if this matcher matches at least one character in the sequence
736   * @since 8.0
737   */
738  public boolean matchesAnyOf(CharSequence sequence) {
739    return !matchesNoneOf(sequence);
740  }
741
742  /**
743   * Returns {@code true} if a character sequence contains only matching characters.
744   *
745   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
746   * character, until this returns {@code false} or the end is reached.
747   *
748   * @param sequence the character sequence to examine, possibly empty
749   * @return {@code true} if this matcher matches every character in the sequence, including when
750   *         the sequence is empty
751   */
752  public boolean matchesAllOf(CharSequence sequence) {
753    for (int i = sequence.length() - 1; i >= 0; i--) {
754      if (!matches(sequence.charAt(i))) {
755        return false;
756      }
757    }
758    return true;
759  }
760
761  /**
762   * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
763   * {@code !matchesAnyOf(sequence)}.
764   *
765   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
766   * character, until this returns {@code false} or the end is reached.
767   *
768   * @param sequence the character sequence to examine, possibly empty
769   * @return {@code true} if this matcher matches every character in the sequence, including when
770   *         the sequence is empty
771   */
772  public boolean matchesNoneOf(CharSequence sequence) {
773    return indexIn(sequence) == -1;
774  }
775
776  // TODO(kevinb): add matchesAnyOf()
777
778  /**
779   * Returns the index of the first matching character in a character sequence, or {@code -1} if no
780   * matching character is present.
781   *
782   * <p>The default implementation iterates over the sequence in forward order calling {@link
783   * #matches} for each character.
784   *
785   * @param sequence the character sequence to examine from the beginning
786   * @return an index, or {@code -1} if no character matches
787   */
788  public int indexIn(CharSequence sequence) {
789    int length = sequence.length();
790    for (int i = 0; i < length; i++) {
791      if (matches(sequence.charAt(i))) {
792        return i;
793      }
794    }
795    return -1;
796  }
797
798  /**
799   * Returns the index of the first matching character in a character sequence, starting from a
800   * given position, or {@code -1} if no character matches after that position.
801   *
802   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
803   * start}, calling {@link #matches} for each character.
804   *
805   * @param sequence the character sequence to examine
806   * @param start the first index to examine; must be nonnegative and no greater than {@code
807   *        sequence.length()}
808   * @return the index of the first matching character, guaranteed to be no less than {@code start},
809   *         or {@code -1} if no character matches
810   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
811   *         sequence.length()}
812   */
813  public int indexIn(CharSequence sequence, int start) {
814    int length = sequence.length();
815    Preconditions.checkPositionIndex(start, length);
816    for (int i = start; i < length; i++) {
817      if (matches(sequence.charAt(i))) {
818        return i;
819      }
820    }
821    return -1;
822  }
823
824  /**
825   * Returns the index of the last matching character in a character sequence, or {@code -1} if no
826   * matching character is present.
827   *
828   * <p>The default implementation iterates over the sequence in reverse order calling {@link
829   * #matches} for each character.
830   *
831   * @param sequence the character sequence to examine from the end
832   * @return an index, or {@code -1} if no character matches
833   */
834  public int lastIndexIn(CharSequence sequence) {
835    for (int i = sequence.length() - 1; i >= 0; i--) {
836      if (matches(sequence.charAt(i))) {
837        return i;
838      }
839    }
840    return -1;
841  }
842
843  /**
844   * Returns the number of matching characters found in a character sequence.
845   */
846  public int countIn(CharSequence sequence) {
847    int count = 0;
848    for (int i = 0; i < sequence.length(); i++) {
849      if (matches(sequence.charAt(i))) {
850        count++;
851      }
852    }
853    return count;
854  }
855
856  /**
857   * Returns a string containing all non-matching characters of a character sequence, in order. For
858   * example: <pre>   {@code
859   *
860   *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
861   *
862   * ... returns {@code "bzr"}.
863   */
864  @CheckReturnValue
865  public String removeFrom(CharSequence sequence) {
866    String string = sequence.toString();
867    int pos = indexIn(string);
868    if (pos == -1) {
869      return string;
870    }
871
872    char[] chars = string.toCharArray();
873    int spread = 1;
874
875    // This unusual loop comes from extensive benchmarking
876    OUT: while (true) {
877      pos++;
878      while (true) {
879        if (pos == chars.length) {
880          break OUT;
881        }
882        if (matches(chars[pos])) {
883          break;
884        }
885        chars[pos - spread] = chars[pos];
886        pos++;
887      }
888      spread++;
889    }
890    return new String(chars, 0, pos - spread);
891  }
892
893  /**
894   * Returns a string containing all matching characters of a character sequence, in order. For
895   * example: <pre>   {@code
896   *
897   *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
898   *
899   * ... returns {@code "aaa"}.
900   */
901  @CheckReturnValue
902  public String retainFrom(CharSequence sequence) {
903    return negate().removeFrom(sequence);
904  }
905
906  /**
907   * Returns a string copy of the input character sequence, with each character that matches this
908   * matcher replaced by a given replacement character. For example: <pre>   {@code
909   *
910   *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
911   *
912   * ... returns {@code "rodor"}.
913   *
914   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
915   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
916   * character.
917   *
918   * @param sequence the character sequence to replace matching characters in
919   * @param replacement the character to append to the result string in place of each matching
920   *        character in {@code sequence}
921   * @return the new string
922   */
923  @CheckReturnValue
924  public String replaceFrom(CharSequence sequence, char replacement) {
925    String string = sequence.toString();
926    int pos = indexIn(string);
927    if (pos == -1) {
928      return string;
929    }
930    char[] chars = string.toCharArray();
931    chars[pos] = replacement;
932    for (int i = pos + 1; i < chars.length; i++) {
933      if (matches(chars[i])) {
934        chars[i] = replacement;
935      }
936    }
937    return new String(chars);
938  }
939
940  /**
941   * Returns a string copy of the input character sequence, with each character that matches this
942   * matcher replaced by a given replacement sequence. For example: <pre>   {@code
943   *
944   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
945   *
946   * ... returns {@code "yoohoo"}.
947   *
948   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
949   * off calling {@link #replaceFrom(CharSequence, char)} directly.
950   *
951   * @param sequence the character sequence to replace matching characters in
952   * @param replacement the characters to append to the result string in place of each matching
953   *        character in {@code sequence}
954   * @return the new string
955   */
956  @CheckReturnValue
957  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
958    int replacementLen = replacement.length();
959    if (replacementLen == 0) {
960      return removeFrom(sequence);
961    }
962    if (replacementLen == 1) {
963      return replaceFrom(sequence, replacement.charAt(0));
964    }
965
966    String string = sequence.toString();
967    int pos = indexIn(string);
968    if (pos == -1) {
969      return string;
970    }
971
972    int len = string.length();
973    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
974
975    int oldpos = 0;
976    do {
977      buf.append(string, oldpos, pos);
978      buf.append(replacement);
979      oldpos = pos + 1;
980      pos = indexIn(string, oldpos);
981    } while (pos != -1);
982
983    buf.append(string, oldpos, len);
984    return buf.toString();
985  }
986
987  /**
988   * Returns a substring of the input character sequence that omits all characters this matcher
989   * matches from the beginning and from the end of the string. For example: <pre>   {@code
990   *
991   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
992   *
993   * ... returns {@code "cat"}.
994   *
995   * <p>Note that: <pre>   {@code
996   *
997   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
998   *
999   * ... is equivalent to {@link String#trim()}.
1000   */
1001  @CheckReturnValue
1002  public String trimFrom(CharSequence sequence) {
1003    int len = sequence.length();
1004    int first;
1005    int last;
1006
1007    for (first = 0; first < len; first++) {
1008      if (!matches(sequence.charAt(first))) {
1009        break;
1010      }
1011    }
1012    for (last = len - 1; last > first; last--) {
1013      if (!matches(sequence.charAt(last))) {
1014        break;
1015      }
1016    }
1017
1018    return sequence.subSequence(first, last + 1).toString();
1019  }
1020
1021  /**
1022   * Returns a substring of the input character sequence that omits all characters this matcher
1023   * matches from the beginning of the string. For example: <pre> {@code
1024   *
1025   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1026   *
1027   * ... returns {@code "catbab"}.
1028   */
1029  @CheckReturnValue
1030  public String trimLeadingFrom(CharSequence sequence) {
1031    int len = sequence.length();
1032    int first;
1033
1034    for (first = 0; first < len; first++) {
1035      if (!matches(sequence.charAt(first))) {
1036        break;
1037      }
1038    }
1039
1040    return sequence.subSequence(first, len).toString();
1041  }
1042
1043  /**
1044   * Returns a substring of the input character sequence that omits all characters this matcher
1045   * matches from the end of the string. For example: <pre> {@code
1046   *
1047   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1048   *
1049   * ... returns {@code "abacat"}.
1050   */
1051  @CheckReturnValue
1052  public String trimTrailingFrom(CharSequence sequence) {
1053    int len = sequence.length();
1054    int last;
1055
1056    for (last = len - 1; last >= 0; last--) {
1057      if (!matches(sequence.charAt(last))) {
1058        break;
1059      }
1060    }
1061
1062    return sequence.subSequence(0, last + 1).toString();
1063  }
1064
1065  /**
1066   * Returns a string copy of the input character sequence, with each group of consecutive
1067   * characters that match this matcher replaced by a single replacement character. For example:
1068   * <pre>   {@code
1069   *
1070   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1071   *
1072   * ... returns {@code "b-p-r"}.
1073   *
1074   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1075   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1076   * character.
1077   *
1078   * @param sequence the character sequence to replace matching groups of characters in
1079   * @param replacement the character to append to the result string in place of each group of
1080   *        matching characters in {@code sequence}
1081   * @return the new string
1082   */
1083  @CheckReturnValue
1084  public String collapseFrom(CharSequence sequence, char replacement) {
1085    int first = indexIn(sequence);
1086    if (first == -1) {
1087      return sequence.toString();
1088    }
1089
1090    // TODO(kevinb): see if this implementation can be made faster
1091    StringBuilder builder = new StringBuilder(sequence.length())
1092        .append(sequence.subSequence(0, first))
1093        .append(replacement);
1094    boolean in = true;
1095    for (int i = first + 1; i < sequence.length(); i++) {
1096      char c = sequence.charAt(i);
1097      if (apply(c)) {
1098        if (!in) {
1099          builder.append(replacement);
1100          in = true;
1101        }
1102      } else {
1103        builder.append(c);
1104        in = false;
1105      }
1106    }
1107    return builder.toString();
1108  }
1109
1110  /**
1111   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1112   * groups of matching characters at the start or end of the sequence are removed without
1113   * replacement.
1114   */
1115  @CheckReturnValue
1116  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1117    int first = negate().indexIn(sequence);
1118    if (first == -1) {
1119      return ""; // everything matches. nothing's left.
1120    }
1121    StringBuilder builder = new StringBuilder(sequence.length());
1122    boolean inMatchingGroup = false;
1123    for (int i = first; i < sequence.length(); i++) {
1124      char c = sequence.charAt(i);
1125      if (apply(c)) {
1126        inMatchingGroup = true;
1127      } else {
1128        if (inMatchingGroup) {
1129          builder.append(replacement);
1130          inMatchingGroup = false;
1131        }
1132        builder.append(c);
1133      }
1134    }
1135    return builder.toString();
1136  }
1137
1138  // Predicate interface
1139
1140  /**
1141   * Returns {@code true} if this matcher matches the given character.
1142   *
1143   * @throws NullPointerException if {@code character} is null
1144   */
1145  @Override public boolean apply(Character character) {
1146    return matches(character);
1147  }
1148}
1149