/* * Copyright (C) 2008 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.android.mail.common.base; import static com.google.android.mail.common.base.Preconditions.checkArgument; import static com.google.android.mail.common.base.Preconditions.checkNotNull; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * Determines a true or false value for any Java {@code char} value, just as * {@link Predicate} does for any {@link Object}. Also offers basic text * processing methods based on this function. Implementations are strongly * encouraged to be side-effect-free and immutable. * *

Throughout the documentation of this class, the phrase "matching * character" is used to mean "any character {@code c} for which {@code * this.matches(c)} returns {@code true}". * *

Note: This class deals only with {@code char} values; it does not * understand supplementary Unicode code points in the range {@code 0x10000} to * {@code 0x10FFFF}. Such logical characters are encoded into a {@code String} * using surrogate pairs, and a {@code CharMatcher} treats these just as two * separate characters. * * @author Kevin Bourrillion * @since 2009.09.15 tentative */ public abstract class CharMatcher implements Predicate { // Constants // Excludes 2000-2000a, which is handled as a range private static final String BREAKING_WHITESPACE_CHARS = "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000"; // Excludes 2007, which is handled as a gap in a pair of ranges private static final String NON_BREAKING_WHITESPACE_CHARS = "\u00a0\u180e\u202f"; /** * Determines whether a character is whitespace according to the latest * Unicode standard, as illustrated * here. * This is not the same definition used by other Java APIs. See a comparison * of several definitions of "whitespace" at * (TODO). * *

Note: as the Unicode definition evolves, we will modify this * constant to keep it up to date. */ public static final CharMatcher WHITESPACE = anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS) .or(inRange('\u2000', '\u200a')); /** * Determines whether a character is a breaking whitespace (that is, * a whitespace which can be interpreted as a break between words * for formatting purposes). See {@link #WHITESPACE} for a discussion * of that term. * * @since 2010.01.04 tentative */ public static final CharMatcher BREAKING_WHITESPACE = anyOf(BREAKING_WHITESPACE_CHARS) .or(inRange('\u2000', '\u2006')) .or(inRange('\u2008', '\u200a')); /** * Determines whether a character is ASCII, meaning that its code point is * less than 128. */ public static final CharMatcher ASCII = inRange('\0', '\u007f'); /** * Determines whether a character is a digit according to * Unicode. */ public static final CharMatcher DIGIT; static { CharMatcher digit = inRange('0', '9'); String zeroes = "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66" + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946" + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10"; for (char base : zeroes.toCharArray()) { digit = digit.or(inRange(base, (char) (base + 9))); } DIGIT = digit; } /** * Determines whether a character is whitespace according to {@link * Character#isWhitespace(char) Java's definition}; it is usually preferable * to use {@link #WHITESPACE}. See a comparison of several definitions of * "whitespace" at go/white+space. */ public static final CharMatcher JAVA_WHITESPACE = inRange('\u0009', (char) 13) // \\u000d doesn't work as a char literal .or(inRange('\u001c', '\u0020')) .or(is('\u1680')) .or(is('\u180e')) .or(inRange('\u2000', '\u2006')) .or(inRange('\u2008', '\u200b')) .or(inRange('\u2028', '\u2029')) .or(is('\u205f')) .or(is('\u3000')); /** * Determines whether a character is a digit according to {@link * Character#isDigit(char) Java's definition}. If you only care to match * ASCII digits, you can use {@code inRange('0', '9')}. */ public static final CharMatcher JAVA_DIGIT = new CharMatcher() { @Override public boolean matches(char c) { return Character.isDigit(c); } }; /** * Determines whether a character is a letter according to {@link * Character#isLetter(char) Java's definition}. If you only care to match * letters of the Latin alphabet, you can use {@code * inRange('a', 'z').or(inRange('A', 'Z'))}. */ public static final CharMatcher JAVA_LETTER = new CharMatcher() { @Override public boolean matches(char c) { return Character.isLetter(c); } }; /** * Determines whether a character is a letter or digit according to {@link * Character#isLetterOrDigit(char) Java's definition}. */ public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() { @Override public boolean matches(char c) { return Character.isLetterOrDigit(c); } }; /** * Determines whether a character is upper case according to {@link * Character#isUpperCase(char) Java's definition}. */ public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() { @Override public boolean matches(char c) { return Character.isUpperCase(c); } }; /** * Determines whether a character is lower case according to {@link * Character#isLowerCase(char) Java's definition}. */ public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() { @Override public boolean matches(char c) { return Character.isLowerCase(c); } }; /** * Determines whether a character is an ISO control character according to * {@link Character#isISOControl(char)}. */ public static final CharMatcher JAVA_ISO_CONTROL = inRange('\u0000', '\u001f') .or(inRange('\u007f', '\u009f')); /** * Determines whether a character is invisible; that is, if its Unicode * category is any of SPACE_SEPARATOR, LINE_SEPARATOR, * PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according * to ICU4J. */ public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020') .or(inRange('\u007f', '\u00a0')) .or(is('\u00ad')) .or(inRange('\u0600', '\u0603')) .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e")) .or(inRange('\u2000', '\u200f')) .or(inRange('\u2028', '\u202f')) .or(inRange('\u205f', '\u2064')) .or(inRange('\u206a', '\u206f')) .or(is('\u3000')) .or(inRange('\ud800', '\uf8ff')) .or(anyOf("\ufeff\ufff9\ufffa\ufffb")); /** * Determines whether a character is single-width (not double-width). When * in doubt, this matcher errs on the side of returning {@code false} (that * is, it tends to assume a character is double-width). * * Note: as the reference file evolves, we will modify this constant * to keep it up to date. */ public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9') .or(is('\u05be')) .or(inRange('\u05d0', '\u05ea')) .or(is('\u05f3')) .or(is('\u05f4')) .or(inRange('\u0600', '\u06ff')) .or(inRange('\u0750', '\u077f')) .or(inRange('\u0e00', '\u0e7f')) .or(inRange('\u1e00', '\u20af')) .or(inRange('\u2100', '\u213a')) .or(inRange('\ufb50', '\ufdff')) .or(inRange('\ufe70', '\ufeff')) .or(inRange('\uff61', '\uffdc')); /** * Determines whether a character is whitespace according to an arbitrary definition used by * {@link StringUtil} for years. Most likely you don't want to use this. See a comparison of * several definitions of "whitespace" at goto/white space. * *

To be deprecated. use {@link #WHITESPACE} to switch to the Unicode definition, or * create a matcher for the specific characters you want. Not deprecated yet because it is a * stepping stone for getting off of many deprecated {@link StringUtil} methods. */ @Deprecated public static final CharMatcher LEGACY_WHITESPACE = anyOf(" \r\n\t\u3000\u00A0\u2007\u202F").precomputed(); /** Matches any character. */ public static final CharMatcher ANY = new CharMatcher() { @Override public boolean matches(char c) { return true; } @Override public int indexIn(CharSequence sequence) { return (sequence.length() == 0) ? -1 : 0; } @Override public int indexIn(CharSequence sequence, int start) { int length = sequence.length(); Preconditions.checkPositionIndex(start, length); return (start == length) ? -1 : start; } @Override public int lastIndexIn(CharSequence sequence) { return sequence.length() - 1; } @Override public boolean matchesAllOf(CharSequence sequence) { checkNotNull(sequence); return true; } @Override public boolean matchesNoneOf(CharSequence sequence) { return sequence.length() == 0; } @Override public String removeFrom(CharSequence sequence) { checkNotNull(sequence); return ""; } @Override public String replaceFrom( CharSequence sequence, char replacement) { char[] array = new char[sequence.length()]; Arrays.fill(array, replacement); return new String(array); } @Override public String replaceFrom( CharSequence sequence, CharSequence replacement) { StringBuilder retval = new StringBuilder(sequence.length() * replacement.length()); for (int i = 0; i < sequence.length(); i++) { retval.append(replacement); } return retval.toString(); } @Override public String collapseFrom(CharSequence sequence, char replacement) { return (sequence.length() == 0) ? "" : String.valueOf(replacement); } @Override public String trimFrom(CharSequence sequence) { checkNotNull(sequence); return ""; } @Override public int countIn(CharSequence sequence) { return sequence.length(); } @Override public CharMatcher and(CharMatcher other) { return checkNotNull(other); } @Override public CharMatcher or(CharMatcher other) { checkNotNull(other); return this; } @Override public CharMatcher negate() { return NONE; } @Override public CharMatcher precomputed() { return this; } }; /** Matches no characters. */ public static final CharMatcher NONE = new CharMatcher() { @Override public boolean matches(char c) { return false; } @Override public int indexIn(CharSequence sequence) { checkNotNull(sequence); return -1; } @Override public int indexIn(CharSequence sequence, int start) { int length = sequence.length(); Preconditions.checkPositionIndex(start, length); return -1; } @Override public int lastIndexIn(CharSequence sequence) { checkNotNull(sequence); return -1; } @Override public boolean matchesAllOf(CharSequence sequence) { return sequence.length() == 0; } @Override public boolean matchesNoneOf(CharSequence sequence) { checkNotNull(sequence); return true; } @Override public String removeFrom(CharSequence sequence) { return sequence.toString(); } @Override public String replaceFrom( CharSequence sequence, char replacement) { return sequence.toString(); } @Override public String replaceFrom( CharSequence sequence, CharSequence replacement) { checkNotNull(replacement); return sequence.toString(); } @Override public String collapseFrom( CharSequence sequence, char replacement) { return sequence.toString(); } @Override public String trimFrom(CharSequence sequence) { return sequence.toString(); } @Override public int countIn(CharSequence sequence) { checkNotNull(sequence); return 0; } @Override public CharMatcher and(CharMatcher other) { checkNotNull(other); return this; } @Override public CharMatcher or(CharMatcher other) { return checkNotNull(other); } @Override public CharMatcher negate() { return ANY; } @Override protected void setBits(LookupTable table) { } @Override public CharMatcher precomputed() { return this; } }; // Static factories /** * Returns a {@code char} matcher that matches only one specified character. */ public static CharMatcher is(final char match) { return new CharMatcher() { @Override public boolean matches(char c) { return c == match; } @Override public String replaceFrom( CharSequence sequence, char replacement) { return sequence.toString().replace(match, replacement); } @Override public CharMatcher and(CharMatcher other) { return other.matches(match) ? this : NONE; } @Override public CharMatcher or(CharMatcher other) { return other.matches(match) ? other : super.or(other); } @Override public CharMatcher negate() { return isNot(match); } @Override protected void setBits(LookupTable table) { table.set(match); } @Override public CharMatcher precomputed() { return this; } }; } /** * Returns a {@code char} matcher that matches any character except the one * specified. * *

To negate another {@code CharMatcher}, use {@link #negate()}. */ public static CharMatcher isNot(final char match) { return new CharMatcher() { @Override public boolean matches(char c) { return c != match; } @Override public CharMatcher and(CharMatcher other) { return other.matches(match) ? super.and(other) : other; } @Override public CharMatcher or(CharMatcher other) { return other.matches(match) ? ANY : this; } @Override public CharMatcher negate() { return is(match); } }; } /** * Returns a {@code char} matcher that matches any character present in the * given character sequence. */ public static CharMatcher anyOf(final CharSequence sequence) { switch (sequence.length()) { case 0: return NONE; case 1: return is(sequence.charAt(0)); case 2: final char match1 = sequence.charAt(0); final char match2 = sequence.charAt(1); return new CharMatcher() { @Override public boolean matches(char c) { return c == match1 || c == match2; } @Override protected void setBits(LookupTable table) { table.set(match1); table.set(match2); } @Override public CharMatcher precomputed() { return this; } }; } final char[] chars = sequence.toString().toCharArray(); Arrays.sort(chars); // not worth collapsing duplicates return new CharMatcher() { @Override public boolean matches(char c) { return Arrays.binarySearch(chars, c) >= 0; } @Override protected void setBits(LookupTable table) { for (char c : chars) { table.set(c); } } }; } /** * Returns a {@code char} matcher that matches any character not present in * the given character sequence. */ public static CharMatcher noneOf(CharSequence sequence) { return anyOf(sequence).negate(); } /** * Returns a {@code char} matcher that matches any character in a given range * (both endpoints are inclusive). For example, to match any lowercase letter * of the English alphabet, use {@code CharMatcher.inRange('a', 'z')}. * * @throws IllegalArgumentException if {@code endInclusive < startInclusive} */ public static CharMatcher inRange( final char startInclusive, final char endInclusive) { checkArgument(endInclusive >= startInclusive); return new CharMatcher() { @Override public boolean matches(char c) { return startInclusive <= c && c <= endInclusive; } @Override protected void setBits(LookupTable table) { char c = startInclusive; while (true) { table.set(c); if (c++ == endInclusive) { break; } } } @Override public CharMatcher precomputed() { return this; } }; } /** * Returns a matcher with identical behavior to the given {@link * Character}-based predicate, but which operates on primitive {@code char} * instances instead. */ public static CharMatcher forPredicate( final Predicate predicate) { checkNotNull(predicate); if (predicate instanceof CharMatcher) { return (CharMatcher) predicate; } return new CharMatcher() { @Override public boolean matches(char c) { return predicate.apply(c); } @Override public boolean apply(Character character) { return predicate.apply(checkNotNull(character)); } }; } // Abstract methods /** Determines a true or false value for the given character. */ public abstract boolean matches(char c); // Non-static factories /** * Returns a matcher that matches any character not matched by this matcher. */ public CharMatcher negate() { final CharMatcher original = this; return new CharMatcher() { @Override public boolean matches(char c) { return !original.matches(c); } @Override public boolean matchesAllOf(CharSequence sequence) { return original.matchesNoneOf(sequence); } @Override public boolean matchesNoneOf(CharSequence sequence) { return original.matchesAllOf(sequence); } @Override public int countIn(CharSequence sequence) { return sequence.length() - original.countIn(sequence); } @Override public CharMatcher negate() { return original; } }; } /** * Returns a matcher that matches any character matched by both this matcher * and {@code other}. */ public CharMatcher and(CharMatcher other) { return new And(Arrays.asList(this, checkNotNull(other))); } private static class And extends CharMatcher { List components; And(List components) { this.components = components; // Skip defensive copy (private) } @Override public boolean matches(char c) { for (CharMatcher matcher : components) { if (!matcher.matches(c)) { return false; } } return true; } @Override public CharMatcher and(CharMatcher other) { List newComponents = new ArrayList(components); newComponents.add(checkNotNull(other)); return new And(newComponents); } } /** * Returns a matcher that matches any character matched by either this matcher * or {@code other}. */ public CharMatcher or(CharMatcher other) { return new Or(Arrays.asList(this, checkNotNull(other))); } private static class Or extends CharMatcher { List components; Or(List components) { this.components = components; // Skip defensive copy (private) } @Override public boolean matches(char c) { for (CharMatcher matcher : components) { if (matcher.matches(c)) { return true; } } return false; } @Override public CharMatcher or(CharMatcher other) { List newComponents = new ArrayList(components); newComponents.add(checkNotNull(other)); return new Or(newComponents); } @Override protected void setBits(LookupTable table) { for (CharMatcher matcher : components) { matcher.setBits(table); } } } /** * Returns a {@code char} matcher functionally equivalent to this one, but * which may be faster to query than the original; your mileage may vary. * Precomputation takes time and is likely to be worthwhile only if the * precomputed matcher is queried many thousands of times. * *

This method has no effect (returns {@code this}) when called in GWT: * it's unclear whether a precomputed matcher is faster, but it certainly * consumes more memory, which doesn't seem like a worthwhile tradeoff in a * browser. */ public CharMatcher precomputed() { return Platform.precomputeCharMatcher(this); } /** * This is the actual implementation of {@link #precomputed}, but we bounce * calls through a method on {@link Platform} so that we can have different * behavior in GWT. * *

The default precomputation is to cache the configuration of the original * matcher in an eight-kilobyte bit array. In some situations this produces a * matcher which is faster to query than the original. * *

The default implementation creates a new bit array and passes it to * {@link #setBits(LookupTable)}. */ CharMatcher precomputedInternal() { final LookupTable table = new LookupTable(); setBits(table); return new CharMatcher() { @Override public boolean matches(char c) { return table.get(c); } // TODO: make methods like negate() smart @Override public CharMatcher precomputed() { return this; } }; } /** * For use by implementors; sets the bit corresponding to each character ('\0' * to '{@literal \}uFFFF') that matches this matcher in the given bit array, * leaving all other bits untouched. * *

The default implementation loops over every possible character value, * invoking {@link #matches} for each one. */ protected void setBits(LookupTable table) { char c = Character.MIN_VALUE; while (true) { if (matches(c)) { table.set(c); } if (c++ == Character.MAX_VALUE) { break; } } } /** * A bit array with one bit per {@code char} value, used by {@link * CharMatcher#precomputed}. * *

TODO: possibly share a common BitArray class with BloomFilter * and others... a simpler java.util.BitSet. */ protected static class LookupTable { int[] data = new int[2048]; void set(char index) { data[index >> 5] |= (1 << index); } boolean get(char index) { return (data[index >> 5] & (1 << index)) != 0; } } // Text processing routines /** * Returns {@code true} if a character sequence contains only matching * characters. * *

The default implementation iterates over the sequence, invoking {@link * #matches} for each character, until this returns {@code false} or the end * is reached. * * @param sequence the character sequence to examine, possibly empty * @return {@code true} if this matcher matches every character in the * sequence, including when the sequence is empty */ public boolean matchesAllOf(CharSequence sequence) { for (int i = sequence.length() - 1; i >= 0; i--) { if (!matches(sequence.charAt(i))) { return false; } } return true; } /** * Returns {@code true} if a character sequence contains no matching * characters. * *

The default implementation iterates over the sequence, invoking {@link * #matches} for each character, until this returns {@code false} or the end is * reached. * * @param sequence the character sequence to examine, possibly empty * @return {@code true} if this matcher matches every character in the * sequence, including when the sequence is empty */ public boolean matchesNoneOf(CharSequence sequence) { return indexIn(sequence) == -1; } // TODO: perhaps add matchesAnyOf() /** * Returns the index of the first matching character in a character sequence, * or {@code -1} if no matching character is present. * *

The default implementation iterates over the sequence in forward order * calling {@link #matches} for each character. * * @param sequence the character sequence to examine from the beginning * @return an index, or {@code -1} if no character matches */ public int indexIn(CharSequence sequence) { int length = sequence.length(); for (int i = 0; i < length; i++) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * Returns the index of the first matching character in a character sequence, * starting from a given position, or {@code -1} if no character matches after * that position. * *

The default implementation iterates over the sequence in forward order, * beginning at {@code start}, calling {@link #matches} for each character. * * @param sequence the character sequence to examine * @param start the first index to examine; must be nonnegative and no * greater than {@code sequence.length()} * @return the index of the first matching character, guaranteed to be no less * than {@code start}, or {@code -1} if no character matches * @throws IndexOutOfBoundsException if start is negative or greater than * {@code sequence.length()} */ public int indexIn(CharSequence sequence, int start) { int length = sequence.length(); Preconditions.checkPositionIndex(start, length); for (int i = start; i < length; i++) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * Returns the index of the last matching character in a character sequence, * or {@code -1} if no matching character is present. * *

The default implementation iterates over the sequence in reverse order * calling {@link #matches} for each character. * * @param sequence the character sequence to examine from the end * @return an index, or {@code -1} if no character matches */ public int lastIndexIn(CharSequence sequence) { for (int i = sequence.length() - 1; i >= 0; i--) { if (matches(sequence.charAt(i))) { return i; } } return -1; } /** * Returns the number of matching characters found in a character sequence. */ public int countIn(CharSequence sequence) { int count = 0; for (int i = 0; i < sequence.length(); i++) { if (matches(sequence.charAt(i))) { count++; } } return count; } /** * Returns a string containing all non-matching characters of a character * sequence, in order. For example:

   {@code
   *
   *   CharMatcher.is('a').removeFrom("bazaar")}

* * ... returns {@code "bzr"}. */ public String removeFrom(CharSequence sequence) { String string = sequence.toString(); int pos = indexIn(string); if (pos == -1) { return string; } char[] chars = string.toCharArray(); int spread = 1; // This unusual loop comes from extensive benchmarking OUT: while (true) { pos++; while (true) { if (pos == chars.length) { break OUT; } if (matches(chars[pos])) { break; } chars[pos - spread] = chars[pos]; pos++; } spread++; } return new String(chars, 0, pos - spread); } /** * Returns a string containing all matching characters of a character * sequence, in order. For example:

   {@code
   *
   *   CharMatcher.is('a').retainFrom("bazaar")}

* * ... returns {@code "aaa"}. */ public String retainFrom(CharSequence sequence) { return negate().removeFrom(sequence); } /** * Returns a string copy of the input character sequence, with each character * that matches this matcher replaced by a given replacement character. For * example:

   {@code
   *
   *   CharMatcher.is('a').replaceFrom("radar", 'o')}

* * ... returns {@code "rodor"}. * *

   {@code
   *
   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}

* * ... returns {@code "yoohoo"}. * *

Note: If the replacement is a fixed string with only one character, * you are better off calling {@link #replaceFrom(CharSequence, char)} directly. * * @param sequence the character sequence to replace matching characters in * @param replacement the characters to append to the result string in place * of each matching character in {@code sequence} * @return the new string */ public String replaceFrom(CharSequence sequence, CharSequence replacement) { int replacementLen = replacement.length(); if (replacementLen == 0) { return removeFrom(sequence); } if (replacementLen == 1) { return replaceFrom(sequence, replacement.charAt(0)); } String string = sequence.toString(); int pos = indexIn(string); if (pos == -1) { return string; } int len = string.length(); StringBuilder buf = new StringBuilder((int) (len * 1.5) + 16); int oldpos = 0; do { buf.append(string, oldpos, pos); buf.append(replacement); oldpos = pos + 1; pos = indexIn(string, oldpos); } while (pos != -1); buf.append(string, oldpos, len); return buf.toString(); } /** * Returns a substring of the input character sequence that omits all * characters this matcher matches from the beginning and from the end of the * string. For example:

 {@code
   *
   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}

* * ... returns {@code "cat"}. * *

Note that

   {@code
   *
   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}

* * ... is equivalent to {@link String#trim()}. */ public String trimFrom(CharSequence sequence) { int len = sequence.length(); int first; int last; for (first = 0; first < len; first++) { if (!matches(sequence.charAt(first))) { break; } } for (last = len - 1; last > first; last--) { if (!matches(sequence.charAt(last))) { break; } } return sequence.subSequence(first, last + 1).toString(); } /** * Returns a substring of the input character sequence that omits all * characters this matcher matches from the beginning of the * string. For example:

 {@code
   *
   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}

* * ... returns {@code "catbab"}. */ public String trimLeadingFrom(CharSequence sequence) { int len = sequence.length(); int first; for (first = 0; first < len; first++) { if (!matches(sequence.charAt(first))) { break; } } return sequence.subSequence(first, len).toString(); } /** * Returns a substring of the input character sequence that omits all * characters this matcher matches from the end of the * string. For example:

 {@code
   *
   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}

* * ... returns {@code "abacat"}. */ public String trimTrailingFrom(CharSequence sequence) { int len = sequence.length(); int last; for (last = len - 1; last >= 0; last--) { if (!matches(sequence.charAt(last))) { break; } } return sequence.subSequence(0, last + 1).toString(); } /** * Returns a string copy of the input character sequence, with each group of * consecutive characters that match this matcher replaced by a single * replacement character. For example:

   {@code
   *
   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}

* * ... returns {@code "b-p-r"}. * *

The default implementation uses {@link #indexIn(CharSequence)} to find * the first matching character, then iterates the remainder of the sequence * calling {@link #matches(char)} for each character. * * @param sequence the character sequence to replace matching groups of * characters in * @param replacement the character to append to the result string in place of * each group of matching characters in {@code sequence} * @return the new string */ public String collapseFrom(CharSequence sequence, char replacement) { int first = indexIn(sequence); if (first == -1) { return sequence.toString(); } // TODO: this implementation can probably be made faster. StringBuilder builder = new StringBuilder(sequence.length()) .append(sequence.subSequence(0, first)) .append(replacement); boolean in = true; for (int i = first + 1; i < sequence.length(); i++) { char c = sequence.charAt(i); if (apply(c)) { if (!in) { builder.append(replacement); in = true; } } else { builder.append(c); in = false; } } return builder.toString(); } /** * Collapses groups of matching characters exactly as {@link #collapseFrom} * does, except that groups of matching characters at the start or end of the * sequence are removed without replacement. */ public String trimAndCollapseFrom(CharSequence sequence, char replacement) { int first = negate().indexIn(sequence); if (first == -1) { return ""; // everything matches. nothing's left. } StringBuilder builder = new StringBuilder(sequence.length()); boolean inMatchingGroup = false; for (int i = first; i < sequence.length(); i++) { char c = sequence.charAt(i); if (apply(c)) { inMatchingGroup = true; } else { if (inMatchingGroup) { builder.append(replacement); inMatchingGroup = false; } builder.append(c); } } return builder.toString(); } // Predicate interface /** * Returns {@code true} if this matcher matches the given character. * * @throws NullPointerException if {@code character} is null */ /*@Override*/ public boolean apply(Character character) { return matches(character); } }